Skip to content

Commit

Permalink
remove warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
lbin committed Dec 2, 2020
1 parent 711fe28 commit ab4d98e
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 105 deletions.
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import glob
import os
import sys

import torch
from setuptools import find_packages, setup
Expand Down Expand Up @@ -37,6 +38,8 @@ def get_extensions():
else:
# raise NotImplementedError('Cuda is not available')
pass

extra_compile_args['cxx'].append('-fopenmp')

sources = [os.path.join(extensions_dir, s) for s in sources]
include_dirs = [extensions_dir]
Expand Down
20 changes: 10 additions & 10 deletions src/cpu/dcn_v2_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ dcn_v2_cpu_forward(const at::Tensor &input,
const int deformable_group)
{
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, weight, bias, offset, mask));
/*AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(weight.type().is_cuda(), "weight must be a CUDA tensor");
AT_ASSERTM(bias.type().is_cuda(), "bias must be a CUDA tensor");
AT_ASSERTM(offset.type().is_cuda(), "offset must be a CUDA tensor");
AT_ASSERTM(mask.type().is_cuda(), "mask must be a CUDA tensor");*/
/*AT_ASSERTM(input.is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(weight.is_cuda(), "weight must be a CUDA tensor");
AT_ASSERTM(bias.is_cuda(), "bias must be a CUDA tensor");
AT_ASSERTM(offset.is_cuda(), "offset must be a CUDA tensor");
AT_ASSERTM(mask.is_cuda(), "mask must be a CUDA tensor");*/

const int batch = input.size(0);
const int channels = input.size(1);
Expand Down Expand Up @@ -126,11 +126,11 @@ std::vector<at::Tensor> dcn_v2_cpu_backward(const at::Tensor &input,
THArgCheck(input.is_contiguous(), 1, "input tensor has to be contiguous");
THArgCheck(weight.is_contiguous(), 2, "weight tensor has to be contiguous");

/*AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(weight.type().is_cuda(), "weight must be a CUDA tensor");
AT_ASSERTM(bias.type().is_cuda(), "bias must be a CUDA tensor");
AT_ASSERTM(offset.type().is_cuda(), "offset must be a CUDA tensor");
AT_ASSERTM(mask.type().is_cuda(), "mask must be a CUDA tensor");*/
/*AT_ASSERTM(input.is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(weight.is_cuda(), "weight must be a CUDA tensor");
AT_ASSERTM(bias.is_cuda(), "bias must be a CUDA tensor");
AT_ASSERTM(offset.is_cuda(), "offset must be a CUDA tensor");
AT_ASSERTM(mask.is_cuda(), "mask must be a CUDA tensor");*/

const int batch = input.size(0);
const int channels = input.size(1);
Expand Down
46 changes: 23 additions & 23 deletions src/cpu/dcn_v2_psroi_pooling_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,11 +288,11 @@ dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input,
const int sample_per_part,
const float trans_std)
{
/*AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(bbox.type().is_cuda(), "rois must be a CUDA tensor");
AT_ASSERTM(trans.type().is_cuda(), "trans must be a CUDA tensor");*/
/*AT_ASSERTM(input.is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(bbox.is_cuda(), "rois must be a CUDA tensor");
AT_ASSERTM(trans.is_cuda(), "trans must be a CUDA tensor");*/

const int batch = input.size(0);
// const int batch = input.size(0);
const int channels = input.size(1);
const int height = input.size(2);
const int width = input.size(3);
Expand Down Expand Up @@ -321,17 +321,17 @@ dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input,
/*dim3 grid(std::min(THCCeilDiv(out_size, 512L), 4096L));
dim3 block(512);*/

AT_DISPATCH_FLOATING_TYPES(input.type(), "dcn_v2_psroi_pooling_cpu_forward", [&] {
AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "dcn_v2_psroi_pooling_cpu_forward", [&] {
DeformablePSROIPoolForwardKernelCpu<scalar_t>(
out_size,
input.contiguous().data<scalar_t>(),
input.contiguous().data_ptr<scalar_t>(),
spatial_scale,
channels,
height, width,
pooled_height,
pooled_width,
bbox.contiguous().data<scalar_t>(),
trans.contiguous().data<scalar_t>(),
bbox.contiguous().data_ptr<scalar_t>(),
trans.contiguous().data_ptr<scalar_t>(),
no_trans,
trans_std,
sample_per_part,
Expand All @@ -340,8 +340,8 @@ dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input,
part_size,
num_classes,
channels_each_class,
out.data<scalar_t>(),
top_count.data<scalar_t>());
out.data_ptr<scalar_t>(),
top_count.data_ptr<scalar_t>());
});
//THCudaCheck(cudaGetLastError());
return std::make_tuple(out, top_count);
Expand All @@ -362,11 +362,11 @@ dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad,
const int sample_per_part,
const float trans_std)
{
/*AT_ASSERTM(out_grad.type().is_cuda(), "out_grad must be a CUDA tensor");
AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(bbox.type().is_cuda(), "bbox must be a CUDA tensor");
AT_ASSERTM(trans.type().is_cuda(), "trans must be a CUDA tensor");
AT_ASSERTM(top_count.type().is_cuda(), "top_count must be a CUDA tensor");*/
/*AT_ASSERTM(out_grad.is_cuda(), "out_grad must be a CUDA tensor");
AT_ASSERTM(input.is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(bbox.is_cuda(), "bbox must be a CUDA tensor");
AT_ASSERTM(trans.is_cuda(), "trans must be a CUDA tensor");
AT_ASSERTM(top_count.is_cuda(), "top_count must be a CUDA tensor");*/

const int batch = input.size(0);
const int channels = input.size(1);
Expand Down Expand Up @@ -395,11 +395,11 @@ dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad,
dim3 block(512);
cudaStream_t stream = at::cuda::getCurrentCUDAStream();*/

AT_DISPATCH_FLOATING_TYPES(out_grad.type(), "dcn_v2_psroi_pooling_cpu_backward", [&] {
AT_DISPATCH_FLOATING_TYPES(out_grad.scalar_type(), "dcn_v2_psroi_pooling_cpu_backward", [&] {
DeformablePSROIPoolBackwardAccKernelCpu<scalar_t>(
out_size,
out_grad.contiguous().data<scalar_t>(),
top_count.contiguous().data<scalar_t>(),
out_grad.contiguous().data_ptr<scalar_t>(),
top_count.contiguous().data_ptr<scalar_t>(),
num_bbox,
spatial_scale,
channels,
Expand All @@ -408,11 +408,11 @@ dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad,
pooled_height,
pooled_width,
output_dim,
input_grad.contiguous().data<scalar_t>(),
trans_grad.contiguous().data<scalar_t>(),
input.contiguous().data<scalar_t>(),
bbox.contiguous().data<scalar_t>(),
trans.contiguous().data<scalar_t>(),
input_grad.contiguous().data_ptr<scalar_t>(),
trans_grad.contiguous().data_ptr<scalar_t>(),
input.contiguous().data_ptr<scalar_t>(),
bbox.contiguous().data_ptr<scalar_t>(),
trans.contiguous().data_ptr<scalar_t>(),
no_trans,
trans_std,
sample_per_part,
Expand Down
92 changes: 46 additions & 46 deletions src/cuda/dcn_v2_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ dcn_v2_cuda_forward(const at::Tensor &input,
{
using scalar_t = float;
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, weight, bias, offset, mask));
AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(weight.type().is_cuda(), "weight must be a CUDA tensor");
AT_ASSERTM(bias.type().is_cuda(), "bias must be a CUDA tensor");
AT_ASSERTM(offset.type().is_cuda(), "offset must be a CUDA tensor");
AT_ASSERTM(mask.type().is_cuda(), "mask must be a CUDA tensor");
AT_ASSERTM(input.is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(weight.is_cuda(), "weight must be a CUDA tensor");
AT_ASSERTM(bias.is_cuda(), "bias must be a CUDA tensor");
AT_ASSERTM(offset.is_cuda(), "offset must be a CUDA tensor");
AT_ASSERTM(mask.is_cuda(), "mask must be a CUDA tensor");

const int batch = input.size(0);
const int channels = input.size(1);
Expand Down Expand Up @@ -108,12 +108,12 @@ dcn_v2_cuda_forward(const at::Tensor &input,
input_b, output_b,
columns_b, ones_b,
weight_b, bias_b,
input.data<scalar_t>(),
output.data<scalar_t>(),
columns.data<scalar_t>(),
ones.data<scalar_t>(),
weight.data<scalar_t>(),
bias.data<scalar_t>(),
input.data_ptr<scalar_t>(),
output.data_ptr<scalar_t>(),
columns.data_ptr<scalar_t>(),
ones.data_ptr<scalar_t>(),
weight.data_ptr<scalar_t>(),
bias.data_ptr<scalar_t>(),
channels * width * height,
channels_out * width_out * height_out,
channels * kernel_h * kernel_w * height_out * width_out,
Expand All @@ -137,14 +137,14 @@ dcn_v2_cuda_forward(const at::Tensor &input,
batch);

modulated_deformable_im2col_cuda(c10::cuda::getCurrentCUDAStream(),
input.data<scalar_t>(),
offset.data<scalar_t>(),
mask.data<scalar_t>(),
input.data_ptr<scalar_t>(),
offset.data_ptr<scalar_t>(),
mask.data_ptr<scalar_t>(),
batch, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
deformable_group,
columns.data<scalar_t>());
columns.data_ptr<scalar_t>());

long m = channels_out;
long n = height_out * width_out;
Expand Down Expand Up @@ -219,11 +219,11 @@ std::vector<at::Tensor> dcn_v2_cuda_backward(const at::Tensor &input,
THArgCheck(input.is_contiguous(), 1, "input tensor has to be contiguous");
THArgCheck(weight.is_contiguous(), 2, "weight tensor has to be contiguous");

AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(weight.type().is_cuda(), "weight must be a CUDA tensor");
AT_ASSERTM(bias.type().is_cuda(), "bias must be a CUDA tensor");
AT_ASSERTM(offset.type().is_cuda(), "offset must be a CUDA tensor");
AT_ASSERTM(mask.type().is_cuda(), "mask must be a CUDA tensor");
AT_ASSERTM(input.is_cuda(), "input must be a CUDA tensor");
AT_ASSERTM(weight.is_cuda(), "weight must be a CUDA tensor");
AT_ASSERTM(bias.is_cuda(), "bias must be a CUDA tensor");
AT_ASSERTM(offset.is_cuda(), "offset must be a CUDA tensor");
AT_ASSERTM(mask.is_cuda(), "mask must be a CUDA tensor");

const int batch = input.size(0);
const int channels = input.size(1);
Expand Down Expand Up @@ -271,68 +271,68 @@ std::vector<at::Tensor> dcn_v2_cuda_backward(const at::Tensor &input,
long k = channels_out;

THCudaBlas_Sgemm(state, 'n', 't', n, m, k, 1.0f,
grad_output_n.data<scalar_t>(), n,
weight.data<scalar_t>(), m, 0.0f,
columns.data<scalar_t>(), n);
grad_output_n.data_ptr<scalar_t>(), n,
weight.data_ptr<scalar_t>(), m, 0.0f,
columns.data_ptr<scalar_t>(), n);

// gradient w.r.t. input coordinate data
modulated_deformable_col2im_coord_cuda(c10::cuda::getCurrentCUDAStream(),
columns.data<scalar_t>(),
input_n.data<scalar_t>(),
offset_n.data<scalar_t>(),
mask_n.data<scalar_t>(),
columns.data_ptr<scalar_t>(),
input_n.data_ptr<scalar_t>(),
offset_n.data_ptr<scalar_t>(),
mask_n.data_ptr<scalar_t>(),
1, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, deformable_group,
grad_offset_n.data<scalar_t>(),
grad_mask_n.data<scalar_t>());
grad_offset_n.data_ptr<scalar_t>(),
grad_mask_n.data_ptr<scalar_t>());
// gradient w.r.t. input data
modulated_deformable_col2im_cuda(c10::cuda::getCurrentCUDAStream(),
columns.data<scalar_t>(),
offset_n.data<scalar_t>(),
mask_n.data<scalar_t>(),
columns.data_ptr<scalar_t>(),
offset_n.data_ptr<scalar_t>(),
mask_n.data_ptr<scalar_t>(),
1, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, deformable_group,
grad_input_n.data<scalar_t>());
grad_input_n.data_ptr<scalar_t>());

// gradient w.r.t. weight, dWeight should accumulate across the batch and group
modulated_deformable_im2col_cuda(c10::cuda::getCurrentCUDAStream(),
input_n.data<scalar_t>(),
offset_n.data<scalar_t>(),
mask_n.data<scalar_t>(),
input_n.data_ptr<scalar_t>(),
offset_n.data_ptr<scalar_t>(),
mask_n.data_ptr<scalar_t>(),
1, channels, height, width,
height_out, width_out, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, deformable_group,
columns.data<scalar_t>());
columns.data_ptr<scalar_t>());

long m_ = channels_out;
long n_ = channels * kernel_h * kernel_w;
long k_ = height_out * width_out;

THCudaBlas_Sgemm(state, 't', 'n', n_, m_, k_, 1.0f,
columns.data<scalar_t>(), k_,
grad_output_n.data<scalar_t>(), k_, 1.0f,
grad_weight.data<scalar_t>(), n_);
columns.data_ptr<scalar_t>(), k_,
grad_output_n.data_ptr<scalar_t>(), k_, 1.0f,
grad_weight.data_ptr<scalar_t>(), n_);

// gradient w.r.t. bias
// long m_ = channels_out;
// long k__ = height_out * width_out;
// THCudaBlas_Sgemm(state,
// 't', 'n',
// k_, m_, 1, 1.0f,
// grad_output_n.data<scalar_t>(), k_,
// ones.data<scalar_t>(), 1, 1.0f,
// grad_bias.data<scalar_t>(), 1);
// grad_output_n.data_ptr<scalar_t>(), k_,
// ones.data_ptr<scalar_t>(), 1, 1.0f,
// grad_bias.data_ptr<scalar_t>(), 1);
THCudaBlas_Sgemm(state,
'N', 'N', 1, m_, k_, 1.0f,
ones.data<scalar_t>(), 1,
grad_output_n.data<scalar_t>(), k_,
ones.data_ptr<scalar_t>(), 1,
grad_output_n.data_ptr<scalar_t>(), k_,
1.0f,
grad_bias.data<scalar_t>(), 1);
grad_bias.data_ptr<scalar_t>(), 1);
}

return {
Expand Down
Loading

0 comments on commit ab4d98e

Please sign in to comment.