Skip to content

Commit

Permalink
Video reader resize (NVIDIA#2097)
Browse files Browse the repository at this point in the history
Added ViedoReaderResize

Signed-off-by: Albert Wolant <[email protected]>
  • Loading branch information
awolant authored Jul 10, 2020
1 parent 29facf8 commit ada6f49
Show file tree
Hide file tree
Showing 14 changed files with 413 additions and 83 deletions.
14 changes: 13 additions & 1 deletion dali/operators/image/resize/resize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,17 @@
#include "dali/pipeline/data/views.h"

namespace dali {
namespace detail {
kernels::ResamplingParams2D GetResamplingParams(
const TransformMeta &meta, kernels::FilterDesc min_filter, kernels::FilterDesc mag_filter) {
kernels::ResamplingParams2D params;
params[0].output_size = meta.rsz_h;
params[1].output_size = meta.rsz_w;
params[0].min_filter = params[1].min_filter = min_filter;
params[0].mag_filter = params[1].mag_filter = mag_filter;
return params;
}
} // namespace detail

DALI_SCHEMA(ResizeAttr)
.AddOptionalArg("image_type",
Expand Down Expand Up @@ -83,7 +94,8 @@ template <>
void Resize<CPUBackend>::SetupSharedSampleParams(SampleWorkspace &ws) {
const int thread_idx = ws.thread_idx();
per_sample_meta_[thread_idx] = GetTransfomMeta(&ws, spec_);
resample_params_[thread_idx] = GetResamplingParams(per_sample_meta_[thread_idx]);
resample_params_[thread_idx] = detail::GetResamplingParams(
per_sample_meta_[thread_idx], min_filter_, mag_filter_);
}

template <>
Expand Down
3 changes: 2 additions & 1 deletion dali/operators/image/resize/resize.cu
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ void Resize<GPUBackend>::SetupSharedSampleParams(DeviceWorkspace &ws) {
DALI_ENFORCE(input_shape.size() == 3, "Expects 3-dimensional image input.");

per_sample_meta_[i] = GetTransformMeta(spec_, input_shape, &ws, i, ResizeInfoNeeded());
resample_params_[i] = GetResamplingParams(per_sample_meta_[i]);
resample_params_[i] = detail::GetResamplingParams(
per_sample_meta_[i], min_filter_, mag_filter_);
}
}

Expand Down
13 changes: 4 additions & 9 deletions dali/operators/image/resize/resize.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
#include "dali/kernels/imgproc/resample/params.h"

namespace dali {
namespace detail {
kernels::ResamplingParams2D GetResamplingParams(
const TransformMeta &meta, kernels::FilterDesc min_filter, kernels::FilterDesc mag_filter);
} // namespace detail

class ResizeAttr : protected ResizeCropMirrorAttr {
public:
Expand Down Expand Up @@ -61,15 +65,6 @@ class Resize : public Operator<Backend>
void RunImpl(Workspace<Backend> &ws) override;
void SetupSharedSampleParams(Workspace<Backend> &ws) override;

kernels::ResamplingParams2D GetResamplingParams(const TransformMeta &meta) const {
kernels::ResamplingParams2D params;
params[0].output_size = meta.rsz_h;
params[1].output_size = meta.rsz_w;
params[0].min_filter = params[1].min_filter = min_filter_;
params[0].mag_filter = params[1].mag_filter = mag_filter_;
return params;
}

USE_OPERATOR_MEMBERS();
using Operator<Backend>::RunImpl;
bool save_attrs_;
Expand Down
2 changes: 1 addition & 1 deletion dali/operators/image/resize/resize_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class DLL_PUBLIC ResizeBase : public ResamplingFilterAttr {
std::vector<kernels::ResamplingParams2D> resample_params_;
TensorListShape<> out_shape_;

private:
protected:
kernels::KernelManager kmgr_;

struct MiniBatch {
Expand Down
15 changes: 8 additions & 7 deletions dali/operators/image/resize/resize_crop_mirror.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ enum t_idInfo : uint32_t {
t_mirrorVert
};

struct TransformMeta {
int H, W, C;
int rsz_h, rsz_w;
std::pair<int, int> crop;
int mirror;
};


/**
* @brief Stores parameters for resize+crop+mirror
*/
Expand All @@ -61,13 +69,6 @@ class ResizeCropMirrorAttr : protected CropAttr {
}
}

struct TransformMeta {
int H, W, C;
int rsz_h, rsz_w;
std::pair<int, int> crop;
int mirror;
};

protected:
inline const TransformMeta GetTransformMeta(const OpSpec &spec,
const TensorShape<> &input_shape,
Expand Down
1 change: 1 addition & 0 deletions dali/operators/reader/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ list(APPEND DALI_OPERATOR_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/sequence_reader_op.c

if(BUILD_NVDEC)
list(APPEND DALI_OPERATOR_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/video_reader_op.cc")
list(APPEND DALI_OPERATOR_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/video_reader_resize_op.cc")
endif()

if (BUILD_LMDB)
Expand Down
15 changes: 15 additions & 0 deletions dali/operators/reader/nvdecoder/sequencewrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,21 @@ struct SequenceWrapper {
LOG_LINE << event_ << " synchronized!" << std::endl;
}

void share_frames(TensorList<GPUBackend> &frames) {
void *current_sequence = sequence.raw_mutable_data();
auto shape = TensorListShape<>::make_uniform(count, frame_shape());

frames.ShareData(
current_sequence,
sequence.type().size() * count * height * width * channels,
shape,
sequence.type());
}

TensorShape<3> frame_shape() const {
return TensorShape<3>{height, width, channels};
}

Tensor<GPUBackend> sequence;
int count;
int height;
Expand Down
14 changes: 1 addition & 13 deletions dali/operators/reader/video_reader_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,7 @@ The video codecs can be contained in most of container file formats. FFmpeg is u
Returns a batch of sequences of `sequence_length` frames of shape [N, F, H, W, C] (N being the batch size and F the
number of frames). Supports only constant frame rate videos.)code")
.NumInput(0)
.OutputFn([](const OpSpec &spec) {
std::string file_root = spec.GetArgument<std::string>("file_root");
std::string file_list = spec.GetArgument<std::string>("file_list");
bool enable_frame_num = spec.GetArgument<bool>("enable_frame_num");
bool enable_timestamps = spec.GetArgument<bool>("enable_timestamps");
int num_outputs = 1;
if (!file_root.empty() || !file_list.empty()) {
num_outputs++;
if (enable_frame_num) num_outputs++;
if (enable_timestamps) num_outputs++;
}
return num_outputs;
})
.OutputFn(detail::VideoReaderOutputFn)
.AddOptionalArg("filenames",
R"code(File names of the video files to load.
This option is mutually exclusive with `file_root` and `file_list`.)code",
Expand Down
141 changes: 90 additions & 51 deletions dali/operators/reader/video_reader_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,23 @@
#include "dali/operators/reader/reader_op.h"
#include "dali/operators/reader/loader/video_loader.h"


namespace dali {
namespace detail {
inline int VideoReaderOutputFn(const OpSpec &spec) {
std::string file_root = spec.GetArgument<std::string>("file_root");
std::string file_list = spec.GetArgument<std::string>("file_list");
bool enable_frame_num = spec.GetArgument<bool>("enable_frame_num");
bool enable_timestamps = spec.GetArgument<bool>("enable_timestamps");
int num_outputs = 1;
if (!file_root.empty() || !file_list.empty()) {
num_outputs++;
if (enable_frame_num) num_outputs++;
if (enable_timestamps) num_outputs++;
}
return num_outputs;
}
} // namespace detail

class VideoReader : public DataReader<GPUBackend, SequenceWrapper> {
public:
Expand All @@ -34,7 +50,6 @@ class VideoReader : public DataReader<GPUBackend, SequenceWrapper> {
enable_timestamps_(spec.GetArgument<bool>("enable_timestamps")),
count_(spec.GetArgument<int>("sequence_length")),
channels_(spec.GetArgument<int>("channels")),
tl_shape_(batch_size_, sequence_dim),
dtype_(spec.GetArgument<DALIDataType>("dtype")) {
DALIImageType image_type(spec.GetArgument<DALIImageType>("image_type"));

Expand All @@ -58,7 +73,6 @@ class VideoReader : public DataReader<GPUBackend, SequenceWrapper> {
"timestamps can be enabled only when "
"`file_list` or `file_root` argument is passed");

// TODO(spanev): support rescale
// TODO(spanev): Factor out the constructor body to make VideoReader compatible with lazy_init.
try {
loader_ = InitLoader<VideoLoader>(spec, filenames_);
Expand All @@ -83,75 +97,97 @@ class VideoReader : public DataReader<GPUBackend, SequenceWrapper> {
void SetupSharedSampleParams(DeviceWorkspace &ws) override {
}

void RunImpl(DeviceWorkspace &ws) override {
auto& tl_sequence_output = ws.Output<GPUBackend>(0);
TensorList<GPUBackend> *label_output = NULL;
TensorList<GPUBackend> *frame_num_output = NULL;
TensorList<GPUBackend> *timestamp_output = NULL;

void SetOutputType(TensorList<GPUBackend> &output) {
if (dtype_ == DALI_FLOAT) {
tl_sequence_output.set_type(TypeInfo::Create<float>());
output.set_type(TypeTable::GetTypeInfoFromStatic<float>());
} else { // dtype_ == DALI_UINT8
tl_sequence_output.set_type(TypeInfo::Create<uint8>());
output.set_type(TypeTable::GetTypeInfoFromStatic<uint8>());
}
}

virtual void SetOutputShape(TensorList<GPUBackend> &output, DeviceWorkspace &ws) {
TensorListShape<> output_shape(batch_size_, sequence_dim);
for (int data_idx = 0; data_idx < batch_size_; ++data_idx) {
auto sequence_shape = GetSample(data_idx).sequence.shape();
tl_shape_.set_tensor_shape(data_idx, sequence_shape);
output_shape.set_tensor_shape(
data_idx, GetSample(data_idx).sequence.shape());
}
output.Resize(output_shape);
}

tl_sequence_output.Resize(tl_shape_);
tl_sequence_output.SetLayout("FHWC");
void PrepareVideoOutput(TensorList<GPUBackend> &output, DeviceWorkspace &ws) {
SetOutputType(output);
SetOutputShape(output, ws);
output.SetLayout("FHWC");
}

void PrepareAdditionalOutputs(DeviceWorkspace &ws) {
if (enable_label_output_) {
int output_index = 1;
label_output = &ws.Output<GPUBackend>(output_index++);
label_output->set_type(TypeInfo::Create<int>());
label_output->Resize(label_shape_);
label_output_ = &ws.Output<GPUBackend>(output_index++);
label_output_->set_type(TypeTable::GetTypeInfoFromStatic<int>());
label_output_->Resize(label_shape_);
if (enable_frame_num_) {
frame_num_output = &ws.Output<GPUBackend>(output_index++);
frame_num_output->set_type(TypeInfo::Create<int>());
frame_num_output->Resize(frame_num_shape_);
frame_num_output_ = &ws.Output<GPUBackend>(output_index++);
frame_num_output_->set_type(TypeTable::GetTypeInfoFromStatic<int>());
frame_num_output_->Resize(frame_num_shape_);
}

if (enable_timestamps_) {
timestamp_output = &ws.Output<GPUBackend>(output_index++);
timestamp_output->set_type(TypeInfo::Create<double>());
timestamp_output->Resize(timestamp_shape_);
timestamp_output_ = &ws.Output<GPUBackend>(output_index++);
timestamp_output_->set_type(TypeTable::GetTypeInfoFromStatic<double>());
timestamp_output_->Resize(timestamp_shape_);
}
}
}

for (int data_idx = 0; data_idx < batch_size_; ++data_idx) {
auto* sequence_output = tl_sequence_output.raw_mutable_tensor(data_idx);

auto& prefetched_sequence = GetSample(data_idx);
tl_sequence_output.type().Copy<GPUBackend, GPUBackend>(sequence_output,
prefetched_sequence.sequence.raw_data(),
prefetched_sequence.sequence.size(),
ws.stream());

if (enable_label_output_) {
auto *label = label_output->mutable_tensor<int>(data_idx);
CUDA_CALL(cudaMemcpyAsync(label, &prefetched_sequence.label, sizeof(int),
cudaMemcpyDefault, ws.stream()));
if (enable_frame_num_) {
auto *frame_num = frame_num_output->mutable_tensor<int>(data_idx);
CUDA_CALL(cudaMemcpyAsync(frame_num, &prefetched_sequence.first_frame_idx,
sizeof(int), cudaMemcpyDefault, ws.stream()));
}
if (enable_timestamps_) {
auto *timestamp = timestamp_output->mutable_tensor<double>(data_idx);
timestamp_output->type().Copy<GPUBackend, CPUBackend>(timestamp,
prefetched_sequence.timestamps.data(),
prefetched_sequence.timestamps.size(),
ws.stream());
}
}
virtual void ProcessSingleVideo(
int data_idx,
TensorList<GPUBackend> &video_output,
SequenceWrapper &prefetched_video,
DeviceWorkspace &ws) {
video_output.type().Copy<GPUBackend, GPUBackend>(
video_output.raw_mutable_tensor(data_idx),
prefetched_video.sequence.raw_data(),
prefetched_video.sequence.size(),
ws.stream());
}

void ProcessAdditionalOutputs(
int data_idx, SequenceWrapper &prefetched_video, cudaStream_t stream) {
if (enable_label_output_) {
auto *label = label_output_->mutable_tensor<int>(data_idx);
CUDA_CALL(cudaMemcpyAsync(
label, &prefetched_video.label, sizeof(int), cudaMemcpyDefault, stream));
if (enable_frame_num_) {
auto *frame_num = frame_num_output_->mutable_tensor<int>(data_idx);
CUDA_CALL(cudaMemcpyAsync(
frame_num, &prefetched_video.first_frame_idx, sizeof(int), cudaMemcpyDefault, stream));
}
if (enable_timestamps_) {
auto *timestamp = timestamp_output_->mutable_tensor<double>(data_idx);
timestamp_output_->type().Copy<GPUBackend, CPUBackend>(
timestamp,
prefetched_video.timestamps.data(),
prefetched_video.timestamps.size(),
stream);
}
}
}

void RunImpl(DeviceWorkspace &ws) override {
auto& video_output = ws.Output<GPUBackend>(0);

PrepareVideoOutput(video_output, ws);
PrepareAdditionalOutputs(ws);

for (int data_idx = 0; data_idx < batch_size_; ++data_idx) {
auto& prefetched_video = GetSample(data_idx);

ProcessSingleVideo(data_idx, video_output, prefetched_video, ws);
ProcessAdditionalOutputs(data_idx, prefetched_video, ws.stream());
}
}

private:
static constexpr int sequence_dim = 4;
std::vector<std::string> filenames_;
std::string file_root_;
Expand All @@ -161,11 +197,14 @@ class VideoReader : public DataReader<GPUBackend, SequenceWrapper> {
int count_;
int channels_;

TensorListShape<> tl_shape_;
TensorListShape<> label_shape_;
TensorListShape<> timestamp_shape_;
TensorListShape<> frame_num_shape_;

TensorList<GPUBackend> *label_output_ = NULL;
TensorList<GPUBackend> *frame_num_output_ = NULL;
TensorList<GPUBackend> *timestamp_output_ = NULL;

DALIDataType dtype_;
bool enable_label_output_;

Expand Down
41 changes: 41 additions & 0 deletions dali/operators/reader/video_reader_resize_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <vector>

#include "dali/core/common.h"
#include "dali/core/error_handling.h"
#include "dali/pipeline/operator/common.h"
#include "dali/pipeline/operator/op_spec.h"
#include "dali/pipeline/operator/operator.h"
#include "dali/operators/reader/video_reader_op.h"
#include "dali/operators/reader/video_reader_resize_op.h"

namespace dali {

DALI_REGISTER_OPERATOR(VideoReaderResize, VideoReaderResize, GPU);

DALI_SCHEMA(VideoReaderResize)
.DocStr(R"code(
Load and decode H264 video codec with FFmpeg and NVDECODE, NVIDIA GPU's hardware-accelerated video decoding.
The video codecs can be contained in most of container file formats. FFmpeg is used to parse video containers.
Returns a batch of sequences of `sequence_length` frames of shape [N, F, H, W, C] (N being the batch size and F the
number of frames). Supports only constant frame rate videos. It resizes video based on provided params. It supports
features of `Resize` operator.)code")
.NumInput(0)
.OutputFn(detail::VideoReaderOutputFn)
.AddParent("VideoReader")
.AddParent("ResizeAttr")
.AddParent("ResamplingFilterAttr");
} // namespace dali
Loading

0 comments on commit ada6f49

Please sign in to comment.