Remove redundant zeroing in col2im/im2col (pytorch#87375)

All of the kernels already either start by zeroing the output, or are careful in their implementation to write values to every output location. So, these `zero_` calls should be redundant. Pull Request resolved: pytorch#87375 Approved by: https://github.com/albanD
bmedishe · Oct 21, 2022 · 5b7f027 · 5b7f027
1 parent 4fc72b0
commit 5b7f027
Show file tree

Hide file tree

Showing 4 changed files with 0 additions and 4 deletions.
diff --git a/aten/src/ATen/native/Col2Im.cpp b/aten/src/ATen/native/Col2Im.cpp
@@ -144,7 +144,6 @@ static void col2im_out_cpu_template(
   int64_t n_output_plane = n_input_plane / (kernel_width * kernel_height);
 
   output.resize_({batch_size, n_output_plane, output_height, output_width});
-  output.zero_();
 
   AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(kBFloat16, kHalf,
       input.scalar_type(), "col2im_out_cpu", [&] {

diff --git a/aten/src/ATen/native/Im2Col.cpp b/aten/src/ATen/native/Im2Col.cpp
@@ -85,7 +85,6 @@ static void im2col_out_cpu_template(
   int64_t output_length = output_height * output_width;
 
   output.resize_({batch_size, n_output_plane, output_length});
-  output.zero_();
 
   AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(kBFloat16, kHalf,
       input.scalar_type(), "im2col_out_cpu", [&] {

diff --git a/aten/src/ATen/native/cuda/Col2Im.cu b/aten/src/ATen/native/cuda/Col2Im.cu
@@ -101,7 +101,6 @@ void col2im_out_cuda_template(
   int64_t input_batch_stride = input.stride(0);
 
   output.resize_({batch_size, n_output_plane, output_height, output_width});
-  output.zero_();
   int64_t output_batch_stride = output.stride(0);
 
   AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(kHalf, kBFloat16,

diff --git a/aten/src/ATen/native/cuda/Im2Col.cu b/aten/src/ATen/native/cuda/Im2Col.cu
@@ -102,7 +102,6 @@ static void im2col_out_cuda_template(
   int64_t output_length = output_height * output_width;
 
   output.resize_({batch_size, n_output_plane, output_length});
-  output.zero_();
 
   // Launch kernel
   AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(kHalf, kBFloat16,