Skip to content

Commit

Permalink
Rework to avoid dangling references to OpSpec instances (NVIDIA#3472)
Browse files Browse the repository at this point in the history
Signed-off-by: Joaquin Anton <[email protected]>
  • Loading branch information
jantonguirao authored Nov 8, 2021
1 parent 701f263 commit 2c89e49
Show file tree
Hide file tree
Showing 32 changed files with 193 additions and 177 deletions.
4 changes: 2 additions & 2 deletions dali/operators/decoder/host/fused/host_decoder_crop.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,7 +30,7 @@ class HostDecoderCrop : public HostDecoder, protected CropAttr {
DISABLE_COPY_MOVE_ASSIGN(HostDecoderCrop);

void inline SetupSharedSampleParams(SampleWorkspace &ws) override {
CropAttr::ProcessArguments(ws);
CropAttr::ProcessArguments(spec_, ws);
}

protected:
Expand Down
4 changes: 2 additions & 2 deletions dali/operators/decoder/host/fused/host_decoder_slice.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -31,7 +31,7 @@ class HostDecoderSlice : public HostDecoder {

protected:
inline void RunImpl(HostWorkspace &ws) override {
slice_attr_.ProcessArguments<CPUBackend>(ws);
slice_attr_.ProcessArguments<CPUBackend>(spec_, ws);
Operator<CPUBackend>::RunImpl(ws);
}

Expand Down
4 changes: 2 additions & 2 deletions dali/operators/decoder/nvjpeg/fused/nvjpeg_decoder_crop.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -36,7 +36,7 @@ class nvJPEGDecoderCrop : public nvJPEGDecoder, protected CropAttr {
}

void SetupSharedSampleParams(MixedWorkspace &ws) override {
CropAttr::ProcessArguments(ws);
CropAttr::ProcessArguments(spec_, ws);
}
};

Expand Down
4 changes: 2 additions & 2 deletions dali/operators/decoder/nvjpeg/fused/nvjpeg_decoder_slice.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -33,7 +33,7 @@ class nvJPEGDecoderSlice : public nvJPEGDecoder {
protected:
using OperatorBase::Run;
void Run(MixedWorkspace &ws) override {
slice_attr_.ProcessArguments<MixedBackend>(ws);
slice_attr_.ProcessArguments<MixedBackend>(spec_, ws);
nvJPEGDecoder::Run(ws);
}

Expand Down
10 changes: 7 additions & 3 deletions dali/operators/generic/erase/erase.cc
Original file line number Diff line number Diff line change
Expand Up @@ -163,13 +163,17 @@ class EraseImplCpu : public OpImplBase<CPUBackend> {
public:
using EraseKernel = kernels::EraseCpu<T, Dims>;

explicit EraseImplCpu(const OpSpec &spec) : spec_(spec) {}
/**
* @param spec Pointer to a persistent OpSpec object,
* which is guaranteed to be alive for the entire lifetime of this object
*/
explicit EraseImplCpu(const OpSpec *spec) : spec_(*spec) {}

bool SetupImpl(std::vector<OutputDesc> &output_desc, const workspace_t<CPUBackend> &ws) override;
void RunImpl(workspace_t<CPUBackend> &ws) override;

private:
OpSpec spec_;
const OpSpec &spec_;
std::vector<int> axes_;
std::vector<kernels::EraseArgs<T, Dims>> args_;
kernels::KernelManager kmgr_;
Expand Down Expand Up @@ -224,7 +228,7 @@ bool Erase<CPUBackend>::SetupImpl(std::vector<OutputDesc> &output_desc,
auto in_shape = input.shape();
TYPE_SWITCH(input.type(), type2id, T, ERASE_SUPPORTED_TYPES, (
VALUE_SWITCH(in_shape.sample_dim(), Dims, ERASE_SUPPORTED_NDIMS, (
impl_ = std::make_unique<EraseImplCpu<T, Dims>>(spec_);
impl_ = std::make_unique<EraseImplCpu<T, Dims>>(&spec_);
), DALI_FAIL(make_string("Unsupported number of dimensions ", in_shape.size()))); // NOLINT
), DALI_FAIL(make_string("Unsupported data type: ", input.type()))); // NOLINT

Expand Down
12 changes: 8 additions & 4 deletions dali/operators/generic/erase/erase.cu
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,11 @@ class EraseImplGpu : public OpImplBase<GPUBackend> {
public:
using EraseKernel = kernels::EraseGpu<T, Dims, channel_dim>;

explicit EraseImplGpu(const OpSpec &spec) : spec_(spec) {
/**
* @param spec Pointer to a persistent OpSpec object,
* which is guaranteed to be alive for the entire lifetime of this object
*/
explicit EraseImplGpu(const OpSpec *spec) : spec_(*spec) {
kmgr_.Resize<EraseKernel>(1, 1);
}

Expand Down Expand Up @@ -117,11 +121,11 @@ bool Erase<GPUBackend>::SetupImpl(std::vector<OutputDesc> &output_desc,
TYPE_SWITCH(input.type(), type2id, T, ERASE_SUPPORTED_TYPES, (
VALUE_SWITCH(in_shape.sample_dim(), Dims, ERASE_SUPPORTED_NDIMS, (
if (channel_dim == -1)
impl_ = std::make_unique<EraseImplGpu<T, Dims, -1>>(spec_);
impl_ = std::make_unique<EraseImplGpu<T, Dims, -1>>(&spec_);
else if (channel_dim == 0)
impl_ = std::make_unique<EraseImplGpu<T, Dims, 0>>(spec_);
impl_ = std::make_unique<EraseImplGpu<T, Dims, 0>>(&spec_);
else if (channel_dim == Dims-1)
impl_ = std::make_unique<EraseImplGpu<T, Dims, Dims-1>>(spec_);
impl_ = std::make_unique<EraseImplGpu<T, Dims, Dims-1>>(&spec_);
else
DALI_FAIL("Unsupported layout. Only 'no channel', "
"'channel first' and 'channel last' layouts are supported.");
Expand Down
6 changes: 3 additions & 3 deletions dali/operators/generic/slice/slice.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,8 +34,8 @@ class Slice : public SliceBase<Backend> {
, slice_attr_(spec) {}

protected:
void ProcessCroppingAttrs(const workspace_t<Backend> &ws) override {
slice_attr_.ProcessArguments<Backend>(ws);
void ProcessCroppingAttrs(const OpSpec &spec, const workspace_t<Backend> &ws) override {
slice_attr_.ProcessArguments<Backend>(spec, ws);
}

const CropWindowGenerator& GetCropWindowGenerator(std::size_t data_idx) const override {
Expand Down
43 changes: 19 additions & 24 deletions dali/operators/generic/slice/slice_attr.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,7 @@ class NamedSliceAttr {
const char* rel_shape_name = "rel_shape",
const char* axes_arg_name = "axes",
const char* axis_names_arg_name = "axis_names")
: spec_(spec),
start_(start_name, spec),
: start_(start_name, spec),
rel_start_(rel_start_name, spec),
end_(end_name, spec),
rel_end_(rel_end_name, spec),
Expand All @@ -78,7 +77,8 @@ class NamedSliceAttr {
}

template <typename Backend>
bool ProcessArguments(const workspace_t<Backend> &ws, int curr_batch_size = -1, int ndim = -1) {
bool ProcessArguments(const OpSpec& spec, const workspace_t<Backend>& ws,
int curr_batch_size = -1, int ndim = -1) {
if (curr_batch_size < 0)
curr_batch_size = ws.GetInputBatchSize(0);
if (ndim < 0)
Expand All @@ -91,18 +91,18 @@ class NamedSliceAttr {
}

if (start_.IsDefined())
start_.Acquire(spec_, ws, curr_batch_size, args_shape);
start_.Acquire(spec, ws, curr_batch_size, args_shape);
else if (rel_start_.IsDefined())
rel_start_.Acquire(spec_, ws, curr_batch_size, args_shape);
rel_start_.Acquire(spec, ws, curr_batch_size, args_shape);

if (end_.IsDefined())
end_.Acquire(spec_, ws, curr_batch_size, args_shape);
end_.Acquire(spec, ws, curr_batch_size, args_shape);
else if (rel_end_.IsDefined())
rel_end_.Acquire(spec_, ws, curr_batch_size, args_shape);
rel_end_.Acquire(spec, ws, curr_batch_size, args_shape);
else if (shape_.IsDefined())
shape_.Acquire(spec_, ws, curr_batch_size, args_shape);
shape_.Acquire(spec, ws, curr_batch_size, args_shape);
else if (rel_shape_.IsDefined())
rel_shape_.Acquire(spec_, ws, curr_batch_size, args_shape);
rel_shape_.Acquire(spec, ws, curr_batch_size, args_shape);

for (int data_idx = 0; data_idx < curr_batch_size; data_idx++) {
ProcessNamedArgs(data_idx);
Expand Down Expand Up @@ -189,8 +189,6 @@ class NamedSliceAttr {
}

private:
const OpSpec &spec_;

std::vector<int> axes_;
TensorLayout axis_names_;

Expand All @@ -212,7 +210,7 @@ class NamedSliceAttr {

class PositionalSliceAttr {
public:
explicit inline PositionalSliceAttr(const OpSpec& spec) : spec_(spec) {
explicit inline PositionalSliceAttr(const OpSpec& spec) {
normalized_anchor_ = spec.GetArgument<bool>("normalized_anchor");
normalized_shape_ = spec.GetArgument<bool>("normalized_shape");
int max_batch_sz = spec.GetArgument<int>("max_batch_size");
Expand All @@ -221,7 +219,7 @@ class PositionalSliceAttr {
}

template <typename Backend>
bool ProcessArguments(const workspace_t<Backend> &ws) {
bool ProcessArguments(const OpSpec &spec, const workspace_t<Backend> &ws) {
auto curr_batch_size = ws.GetInputBatchSize(0);
int ndim = ws.GetInputDim(0);

Expand All @@ -231,7 +229,7 @@ class PositionalSliceAttr {
static_cast<int>(axis_names_.size()));
}

if (ws.NumInput() != (spec_.GetSchema().MinNumInput() + 2))
if (ws.NumInput() != (spec.GetSchema().MinNumInput() + 2))
return false;

const auto &crop_anchor = ws.template Input<CPUBackend>(1);
Expand Down Expand Up @@ -332,7 +330,6 @@ class PositionalSliceAttr {
}

private:
const OpSpec &spec_;
bool normalized_anchor_, normalized_shape_;
std::vector<int> axes_;
TensorLayout axis_names_;
Expand All @@ -343,25 +340,24 @@ class PositionalSliceAttr {
class SliceAttr {
public:
explicit inline SliceAttr(const OpSpec &spec)
: spec_(spec),
named_slice_attr_(spec),
: named_slice_attr_(spec),
pos_slice_attr_(spec) {
}

template <typename Backend>
void ProcessArguments(const workspace_t<Backend> &ws) {
use_named_args_ = named_slice_attr_.template ProcessArguments<Backend>(ws);
void ProcessArguments(const OpSpec &spec, const workspace_t<Backend> &ws) {
use_named_args_ = named_slice_attr_.template ProcessArguments<Backend>(spec, ws);
if (use_named_args_) {
if (spec_.HasArgument("normalized_anchor") || spec_.HasArgument("normalized_shape")) {
if (spec.HasArgument("normalized_anchor") || spec.HasArgument("normalized_shape")) {
DALI_WARN(
"Warning: ``normalized_anchor``/``normalized_shape`` is only relevant "
"when using positional slice arguments");
}
DALI_ENFORCE(ws.NumInput() == spec_.GetSchema().MinNumInput(),
DALI_ENFORCE(ws.NumInput() == spec.GetSchema().MinNumInput(),
"Named arguments start/end/shape are not compatible with positional"
" anchor and shape inputs");
} else if (ws.NumInput() == (spec_.GetSchema().MinNumInput() + 2)) {
bool processed_pos_args = pos_slice_attr_.template ProcessArguments<Backend>(ws);
} else if (ws.NumInput() == (spec.GetSchema().MinNumInput() + 2)) {
bool processed_pos_args = pos_slice_attr_.template ProcessArguments<Backend>(spec, ws);
DALI_ENFORCE(processed_pos_args, "Failed to process positional arguments (start, shape)");
} else {
DALI_FAIL(
Expand All @@ -379,7 +375,6 @@ class SliceAttr {
}

private:
const OpSpec &spec_;
NamedSliceAttr named_slice_attr_;
PositionalSliceAttr pos_slice_attr_;
bool use_named_args_ = false;
Expand Down
4 changes: 2 additions & 2 deletions dali/operators/generic/slice/slice_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class SliceBase : public Operator<Backend> {
template <typename OutputType, int Dims>
void FillArgs(std::vector<kernels::SliceArgs<OutputType, Dims>>& slice_args,
const workspace_t<Backend> &ws) {
this->ProcessCroppingAttrs(ws);
this->ProcessCroppingAttrs(spec_, ws);
const auto &input = ws.template Input<Backend>(0);
auto in_shape = input.shape();
int nsamples = in_shape.num_samples();
Expand All @@ -79,7 +79,7 @@ class SliceBase : public Operator<Backend> {
/**
* @brief Implementation specific (Crop, Slice, ...)
*/
virtual void ProcessCroppingAttrs(const workspace_t<Backend> &ws) = 0;
virtual void ProcessCroppingAttrs(const OpSpec &spec, const workspace_t<Backend> &ws) = 0;
virtual const CropWindowGenerator& GetCropWindowGenerator(std::size_t data_idx) const = 0;

bool CanInferOutputs() const override {
Expand Down
14 changes: 9 additions & 5 deletions dali/operators/image/convolution/gaussian_blur.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,12 @@ class GaussianBlurOpCpu : public OpImplBase<CPUBackend> {
using Kernel = kernels::SeparableConvolutionCpu<Out, In, float, axes, has_channels>;
static constexpr int ndim = Kernel::ndim;

explicit GaussianBlurOpCpu(const OpSpec& spec, const DimDesc& dim_desc)
: spec_(spec), dim_desc_(dim_desc) {}
/**
* @param spec Pointer to a persistent OpSpec object,
* which is guaranteed to be alive for the entire lifetime of this object
*/
explicit GaussianBlurOpCpu(const OpSpec* spec, const DimDesc& dim_desc)
: spec_(*spec), dim_desc_(dim_desc) {}

bool SetupImpl(std::vector<OutputDesc>& output_desc, const workspace_t<CPUBackend>& ws) override {
const auto& input = ws.template Input<CPUBackend>(0);
Expand Down Expand Up @@ -188,7 +192,7 @@ class GaussianBlurOpCpu : public OpImplBase<CPUBackend> {
}

private:
OpSpec spec_;
const OpSpec &spec_;
DimDesc dim_desc_;

kernels::KernelManager kmgr_;
Expand Down Expand Up @@ -217,9 +221,9 @@ bool GaussianBlur<CPUBackend>::SetupImpl(std::vector<OutputDesc>& output_desc,
VALUE_SWITCH(static_cast<int>(dim_desc.has_channels), HAS_CHANNELS, (0, 1), (
constexpr bool has_ch = HAS_CHANNELS;
if (dtype_ == input.type()) {
impl_ = std::make_unique<GaussianBlurOpCpu<In, In, AXES, has_ch>>(spec_, dim_desc);
impl_ = std::make_unique<GaussianBlurOpCpu<In, In, AXES, has_ch>>(&spec_, dim_desc);
} else {
impl_ = std::make_unique<GaussianBlurOpCpu<float, In, AXES, has_ch>>(spec_, dim_desc);
impl_ = std::make_unique<GaussianBlurOpCpu<float, In, AXES, has_ch>>(&spec_, dim_desc);
}
), ()); // NOLINT, no other possible conversion
), DALI_FAIL("Axis count out of supported range.")); // NOLINT
Expand Down
46 changes: 23 additions & 23 deletions dali/operators/image/convolution/gaussian_blur.cu
Original file line number Diff line number Diff line change
Expand Up @@ -36,37 +36,37 @@ namespace gaussian_blur {
// Functions below are explicitly instantiated in separate compilation unit to allow
// for parallel compitlation of underlying kernels.

extern template op_impl_uptr GetGaussianBlurGpuImpl<uint8_t, uint8_t>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, uint8_t>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<uint8_t, uint8_t>(const OpSpec*, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, uint8_t>(const OpSpec*, DimDesc);

extern template op_impl_uptr GetGaussianBlurGpuImpl<int8_t, int8_t>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, int8_t>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<int8_t, int8_t>(const OpSpec*, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, int8_t>(const OpSpec*, DimDesc);

extern template op_impl_uptr GetGaussianBlurGpuImpl<uint16_t, uint16_t>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, uint16_t>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<uint16_t, uint16_t>(const OpSpec*, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, uint16_t>(const OpSpec*, DimDesc);

extern template op_impl_uptr GetGaussianBlurGpuImpl<int16_t, int16_t>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, int16_t>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<int16_t, int16_t>(const OpSpec*, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, int16_t>(const OpSpec*, DimDesc);

extern template op_impl_uptr GetGaussianBlurGpuImpl<uint32_t, uint32_t>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, uint32_t>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<uint32_t, uint32_t>(const OpSpec*, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, uint32_t>(const OpSpec*, DimDesc);

extern template op_impl_uptr GetGaussianBlurGpuImpl<int32_t, int32_t>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, int32_t>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<int32_t, int32_t>(const OpSpec*, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, int32_t>(const OpSpec*, DimDesc);

extern template op_impl_uptr GetGaussianBlurGpuImpl<uint64_t, uint64_t>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, uint64_t>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<uint64_t, uint64_t>(const OpSpec*, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, uint64_t>(const OpSpec*, DimDesc);

extern template op_impl_uptr GetGaussianBlurGpuImpl<int64_t, int64_t>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, int64_t>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<int64_t, int64_t>(const OpSpec*, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, int64_t>(const OpSpec*, DimDesc);

extern template op_impl_uptr GetGaussianBlurGpuImpl<float16, float16>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, float16>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float16, float16>(const OpSpec*, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, float16>(const OpSpec*, DimDesc);

extern template op_impl_uptr GetGaussianBlurGpuImpl<float, float>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, float>(const OpSpec*, DimDesc);

extern template op_impl_uptr GetGaussianBlurGpuImpl<double, double>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, double>(const OpSpec&, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<double, double>(const OpSpec*, DimDesc);
extern template op_impl_uptr GetGaussianBlurGpuImpl<float, double>(const OpSpec*, DimDesc);

} // namespace gaussian_blur

Expand All @@ -83,9 +83,9 @@ bool GaussianBlur<GPUBackend>::SetupImpl(std::vector<OutputDesc>& output_desc,
// clang-format off
TYPE_SWITCH(input.type(), type2id, In, GAUSSIAN_BLUR_GPU_SUPPORTED_TYPES, (
if (dtype_ == input.type()) {
impl_ = GetGaussianBlurGpuImpl<In, In>(spec_, dim_desc);
impl_ = GetGaussianBlurGpuImpl<In, In>(&spec_, dim_desc);
} else {
impl_ = GetGaussianBlurGpuImpl<float, In>(spec_, dim_desc);
impl_ = GetGaussianBlurGpuImpl<float, In>(&spec_, dim_desc);
}
), DALI_FAIL(make_string("Unsupported data type: ", input.type()))); // NOLINT
// clang-format on
Expand Down
Loading

0 comments on commit 2c89e49

Please sign in to comment.