diff --git a/paddle/fluid/operators/collective/c_broadcast_op.cc b/paddle/fluid/operators/collective/c_broadcast_op.cc
index e53794444ecc8..f1672f6dd04b0 100644
--- a/paddle/fluid/operators/collective/c_broadcast_op.cc
+++ b/paddle/fluid/operators/collective/c_broadcast_op.cc
@@ -57,13 +57,3 @@ namespace ops = paddle::operators;
 REGISTER_OP_WITHOUT_GRADIENT(c_broadcast,
                              ops::CBroadcastOp,
                              ops::CBroadcastOpMaker);
-
-PD_REGISTER_STRUCT_KERNEL(c_broadcast,
-                          CPU,
-                          ALL_LAYOUT,
-                          ops::CBroadcastOpCPUKernel,
-                          float,
-                          double,
-                          int,
-                          int64_t,
-                          phi::dtype::float16) {}
diff --git a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc
deleted file mode 100644
index 069de3efe0d6c..0000000000000
--- a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc
+++ /dev/null
@@ -1,117 +0,0 @@
-/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/collective/c_broadcast_op.h"
-#include "paddle/phi/core/distributed/comm_context_manager.h"
-#include "paddle/phi/core/distributed/nccl_comm_context.h"
-
-#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
-#include "paddle/fluid/distributed/collective/process_group.h"
-#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
-#include "paddle/phi/core/platform/collective_helper.h"
-#endif
-#include "paddle/phi/api/include/tensor.h"
-
-namespace paddle {
-namespace operators {
-
-template <typename T, typename DeviceContext>
-class CBroadcastOpCUDAKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
-    auto x = ctx.Input<phi::DenseTensor>("X");
-    auto out = ctx.Output<phi::DenseTensor>("Out");
-
-    int rid = ctx.Attr<int>("ring_id");
-    const auto& place = ctx.GetPlace();
-    ctx.device_context().Alloc<T>(out);
-
-    int root = ctx.Attr<int>("root");
-
-    auto map = distributed::ProcessGroupMapFromGid::getInstance();
-    if (map->has(rid)) {
-      distributed::ProcessGroup* pg = map->get(rid);
-      auto b_opts = distributed::BroadcastOptions();
-      b_opts.source_rank = rid;
-      b_opts.source_root = root;
-      auto task = pg->Broadcast(out, *x, b_opts, false);
-      task->Wait();
-      return;
-    }
-
-    gpuStream_t stream = ctx.cuda_device_context().stream();
-    const auto& comm_context_manager =
-        phi::distributed::CommContextManager::GetInstance();
-    if (comm_context_manager.Has(std::to_string(rid))) {
-      auto* comm_context = static_cast<phi::distributed::NCCLCommContext*>(
-          comm_context_manager.Get(std::to_string(rid)));
-
-      comm_context->Broadcast(out, *x, root, stream);
-    } else {
-      // NOTE(liyurui): This will be removed after moving this operator to phi.
-      int numel = x->numel();
-      ncclDataType_t dtype = phi::ToNCCLDataType(x->dtype());
-      auto comm = platform::NCCLCommContext::Instance().Get(rid, place);
-      if (root == comm->rank()) {
-        PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::ncclBcast(
-            reinterpret_cast<void*>(const_cast<T*>(x->data<T>())),
-            numel,
-            dtype,
-            root,
-            comm->comm(),
-            stream));
-        VLOG(3) << "rank " << comm->rank() << " invoke Bcast. sent "
-                << x->numel();
-        if (out != x) {
-          framework::TensorCopy(*static_cast<const phi::DenseTensor*>(x),
-                                place,
-                                *phi::DeviceContextPool::Instance().Get(place),
-                                static_cast<phi::DenseTensor*>(out));
-        }
-      } else {
-        PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::ncclBcast(
-            out->data<T>(), numel, dtype, root, comm->comm(), stream));
-        VLOG(3) << "rank " << comm->rank() << " invoke Bcast. received "
-                << common::product(out->dims());
-      }
-    }
-
-    out->set_lod(x->lod());
-#else
-    PADDLE_THROW(common::errors::PreconditionNotMet(
-        "PaddlePaddle should compile with GPU."));
-#endif
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-
-PD_REGISTER_STRUCT_KERNEL(c_broadcast,
-                          GPU,
-                          ALL_LAYOUT,
-                          ops::CBroadcastOpCUDAKernel,
-                          int,
-                          int64_t,
-                          float,
-                          double,
-#if (NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000) || \
-    defined(PADDLE_WITH_HIP)
-                          phi::dtype::bfloat16,
-#endif
-                          phi::dtype::float16) {
-}
diff --git a/paddle/fluid/operators/collective/c_broadcast_op_xpu.cc b/paddle/fluid/operators/collective/c_broadcast_op_xpu.cc
deleted file mode 100644
index 3192e122f3791..0000000000000
--- a/paddle/fluid/operators/collective/c_broadcast_op_xpu.cc
+++ /dev/null
@@ -1,132 +0,0 @@
-/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/collective/c_broadcast_op.h"
-#include "paddle/phi/core/distributed/comm_context_manager.h"
-
-#ifdef PADDLE_WITH_XPU_BKCL
-#include "paddle/common/flags.h"
-#include "paddle/fluid/platform/device/xpu/bkcl_helper.h"
-#include "paddle/phi/core/distributed/bkcl_comm_context.h"
-#include "paddle/phi/core/platform/collective_helper.h"
-COMMON_DECLARE_bool(dynamic_static_unified_comm);
-#endif
-#include "paddle/fluid/distributed/collective/process_group.h"
-
-namespace paddle {
-namespace operators {
-
-template <typename T, typename DeviceContext>
-class CBroadcastOpXPUKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-#if defined(PADDLE_WITH_XPU_BKCL)
-    auto x = ctx.Input<phi::DenseTensor>("X");
-    auto out = ctx.Output<phi::DenseTensor>("Out");
-    size_t numel = x->numel();
-
-    BKCLDataType dtype = phi::ToBKCLDataType(x->dtype());
-    int ring_id = ctx.Attr<int>("ring_id");
-    auto place = ctx.GetPlace();
-    int root = ctx.Attr<int>("root");
-
-    platform::BKCLComm* comm = nullptr;
-    phi::distributed::BKCLCommContext* comm_ctx = nullptr;
-    XPUStream stream = nullptr;
-    const auto& comm_context_manager =
-        phi::distributed::CommContextManager::GetInstance();
-    if (FLAGS_dynamic_static_unified_comm) {
-      PADDLE_ENFORCE_EQ(comm_context_manager.Has(std::to_string(ring_id)),
-                        true,
-                        common::errors::InvalidArgument(
-                            "You choose to use new communication library by "
-                            "setting environment "
-                            "variable FLAGS_dynamic_static_unified_comm True. "
-                            "But ring_id(%d) is "
-                            "not found in comm_context_manager.",
-                            std::to_string(ring_id)));
-      comm_ctx = static_cast<phi::distributed::BKCLCommContext*>(
-          comm_context_manager.Get(std::to_string(ring_id)));
-      PADDLE_ENFORCE_NE(comm_ctx,
-                        nullptr,
-                        common::errors::Unavailable(
-                            "BKCLCommContext is nullptr, collective op should "
-                            "has ring_id attr."));
-      stream = comm_ctx->GetStream();
-      VLOG(3) << "new comm_context_manager has rid " << ring_id;
-    } else {  // old comm_context
-      comm = paddle::platform::BKCLCommContext::Instance().Get(ring_id, place);
-      stream = comm->stream();
-      VLOG(3) << "old BKCLCommContext has rid " << ring_id;
-    }
-    if (comm_ctx) {
-      comm_ctx->Broadcast(out, *x, root, stream);
-    } else {
-      void* send_recv_buffer = nullptr;
-      if (root == comm->rank()) {
-        send_recv_buffer =
-            reinterpret_cast<void*>(const_cast<T*>(x->data<T>()));
-        PADDLE_ENFORCE_XPU_SUCCESS(bkcl_broadcast(comm->comm(),
-                                                  send_recv_buffer,
-                                                  send_recv_buffer,
-                                                  numel,
-                                                  dtype,
-                                                  root,
-                                                  stream));
-        VLOG(3) << "rank " << comm->rank() << " invoke Bcast. sent "
-                << x->numel();
-        if (out != x) {
-          framework::TensorCopy(*static_cast<const phi::DenseTensor*>(x),
-                                place,
-                                *phi::DeviceContextPool::Instance().Get(place),
-                                static_cast<phi::DenseTensor*>(out));
-        }
-      } else {
-        auto& dev_ctx = ctx.template device_context<phi::XPUContext>();
-        dev_ctx.template Alloc<T>(out);
-        send_recv_buffer = out->data<T>();
-        PADDLE_ENFORCE_XPU_SUCCESS(bkcl_broadcast(comm->comm(),
-                                                  send_recv_buffer,
-                                                  send_recv_buffer,
-                                                  numel,
-                                                  dtype,
-                                                  root,
-                                                  stream));
-        VLOG(3) << "rank " << comm->rank() << " invoke Bcast. received "
-                << phi::product(out->dims());
-      }
-    }
-    out->Resize(x->dims());
-    out->set_lod(x->lod());
-#else
-    PADDLE_THROW(common::errors::PreconditionNotMet(
-        "PaddlePaddle should be compiled with XPU and BKCL."));
-#endif
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-
-PD_REGISTER_STRUCT_KERNEL(c_broadcast,
-                          XPU,
-                          ALL_LAYOUT,
-                          ops::CBroadcastOpXPUKernel,
-                          float,
-                          double,
-                          phi::dtype::float16,
-                          int,
-                          int64_t) {}