Fuzzing targets (NVIDIA#2219)

* Fuzzing targets Signed-off-by: Albert Wolant <[email protected]>
zhenlin-work · Sep 2, 2020 · 7186005 · 7186005
1 parent ce2758c
commit 7186005
Show file tree

Hide file tree

Showing 9 changed files with 382 additions and 2 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -36,6 +36,7 @@ check_cxx_compiler_flag(-fopenmp-simd CXX_HAVE_OMP_SIMD)
 # Build options
 option(BUILD_TEST "Build googletest test suite" ON)
 option(BUILD_BENCHMARK "Build benchmark suite" ON)
+option(BUILD_FUZZING "Build fuzzing suite" OFF)
 # if BUILD_NVTX is empty remove it and let is be default
 if ("${BUILD_NVTX}" STREQUAL "")
   unset(BUILD_NVTX CACHE)

diff --git a/DALI_EXTRA_VERSION b/DALI_EXTRA_VERSION
@@ -1 +1 @@
-1f8ae95b793b96e94e7339a494b80ec3e1757930
+1bf411be92e65b0866a5b4a1915929c9a8c220d0
diff --git a/conda/recipe/meta.yaml b/conda/recipe/meta.yaml
@@ -27,6 +27,7 @@ build:
    - CMAKE_BUILD_TYPE
    - BUILD_TEST
    - BUILD_BENCHMARK
+   - BUILD_FUZZING
    - BUILD_NVTX
    - BUILD_PYTHON
    - BUILD_LMDB

diff --git a/dali/CMakeLists.txt b/dali/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -89,6 +89,14 @@ if (BUILD_BENCHMARK)
   add_subdirectory(benchmark)
 endif()
 
+
+################################################
+# Build fuzzing suite
+################################################
+if (BUILD_FUZZING)
+  add_subdirectory(fuzzing)
+endif()
+
 ################################################
 # Build the DALI python bindings
 ################################################

diff --git a/dali/fuzzing/CMakeLists.txt b/dali/fuzzing/CMakeLists.txt
@@ -0,0 +1,35 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+collect_headers(DALI_INST_HDRS PARENT_SCOPE)
+set(DALI_FUZZING_SRCS
+  "${PROJECT_SOURCE_DIR}/dali/test/dali_test_config.cc"
+)
+
+function(DALI_ADD_FUZZING_TARGET TARGET_NAME BINARY_NAME TARGET_SRC)
+  add_executable(${TARGET_NAME} "${DALI_FUZZING_SRCS}" "${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_SRC}")
+  target_link_libraries(${TARGET_NAME} PRIVATE dali dali_operators ${DALI_LIBS})
+  target_link_libraries(${TARGET_NAME} PRIVATE "-pie")
+  set_target_properties(${TARGET_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+  set_target_properties(${TARGET_NAME} PROPERTIES OUTPUT_NAME "${BINARY_NAME}")
+  set_target_properties(${TARGET_NAME} PROPERTIES
+    RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${DALI_WHEEL_DIR}/test")
+endfunction()
+
+# Image decoder fuzzing target
+dali_add_fuzzing_target(dali_decoder_fuzzing dali_image_decoder_fuzzing_target.bin image_decoder_target.cc)
+
+# ResNet pipeline fuzzing target
+dali_add_fuzzing_target(dali_rn50_fuzzing dali_rn50_fuzzing_target.bin rn50_target.cc)
diff --git a/dali/fuzzing/README.md b/dali/fuzzing/README.md
@@ -0,0 +1,82 @@
+# DALI Fuzzing Instruction
+
+This is instruction how to run fuzzing on DALI.
+
+The goal of fuzzing is to find bugs in software. Fuzzer runs given binary multiple times with different inputs to look for possible problems. It generates inputs as it goes based on feedback from the tested binary. This feedback includes execution paths, previously seen errors etc. This gives better results than random search as space of possible inputs may be huge.
+
+As a tool to run fuzzing we use [American Fuzzy Lop](https://github.com/google/AFL).
+
+
+## Setup AFL
+First we need to setup AFL. Script below shows how to do it on clean Ubuntu 18 installation.
+```
+sudo apt-get install clang-6.0 build-essential llvm-6.0-dev gnuplot-nox
+
+sudo update-alternatives --install /usr/bin/clang clang `which clang-6.0` 1
+sudo update-alternatives --install /usr/bin/clang++ clang++ `which clang++-6.0` 1
+sudo update-alternatives --install /usr/bin/llvm-config llvm-config `which llvm-config-6.0` 1
+sudo update-alternatives --install /usr/bin/llvm-symbolizer llvm-symbolizer `which llvm-symbolizer-6.0` 1
+
+echo core | sudo tee /proc/sys/kernel/core_pattern
+
+wget http://lcamtuf.coredump.cx/afl/releases/afl-latest.tgz
+tar xvf afl-latest.tgz
+cd afl-2.52b   # replace with whatever the current version is
+make && make -C llvm_mode CXX=g++
+make install
+
+```
+
+
+## Build DALI fuzzing targets
+
+We need to build DALI with AFL compiler extensions to enable AFL to trace the binary we want to fuzz later.
+```
+cmake -DCMAKE_CXX_COMPILER=afl-clang-fast++ -DCMAKE_C_COMPILER=afl-clang-fast -DCUDA_TARGET_ARCHS=61 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DBUILD_FUZZING=ON -DBUILD_PYTHON=OFF -DBUILD_LMDB=OFF -DBUILD_NVOF=OFF ..
+```
+
+## Run fuzzing
+
+Now we are ready to run fuzzing. Fuzzer takes a binary and runs it multiple times with different inputs. 
+
+With *-i* parameter we point AFL to a directory with examples of possible inputs to start fuzzing process. Important notice, these examples should not trigger errors or exceptions.
+
+*-m none* lifts the memory limit from tested binary.
+
+*-o* points to the directory where AFL will write results including inputs that caused an error. This allows to reproduce them later.
+
+At the end of the call we pass path to binary to be tested. We use *@@* to mark the place in the call where AFL should put a path to the generated input. 
+
+```
+afl-fuzz -i /DALI_extra/db/fuzzing/bmp/ -m none -o fuzz_results ./build/dali/python/nvidia/dali/test/dali_rn50_fuzzing_target.bin @@
+```
+
+When run properly, after some setup output should look similar to:
+
+```
+            american fuzzy lop 2.52b (dali_rn50_fuzzing_target.bin)
+
+┌─ process timing ─────────────────────────────────────┬─ overall results ─────┐
+│        run time : 0 days, 0 hrs, 1 min, 9 sec        │  cycles done : 0      │
+│   last new path : none seen yet                      │  total paths : 4      │
+│ last uniq crash : none seen yet                      │ uniq crashes : 0      │
+│  last uniq hang : none seen yet                      │   uniq hangs : 0      │
+├─ cycle progress ────────────────────┬─ map coverage ─┴───────────────────────┤
+│  now processing : 2 (50.00%)        │    map density : 10.86% / 10.86%       │
+│ paths timed out : 1 (25.00%)        │ count coverage : 1.00 bits/tuple       │
+├─ stage progress ────────────────────┼─ findings in depth ────────────────────┤
+│  now trying : trim 16/16            │ favored paths : 2 (50.00%)             │
+│ stage execs : 25/47 (53.19%)        │  new edges on : 4 (100.00%)            │
+│ total execs : 123                   │ total crashes : 0 (0 unique)           │
+│  exec speed : 0.00/sec (zzzz...)    │  total tmouts : 0 (0 unique)           │
+├─ fuzzing strategy yields ───────────┴───────────────┬─ path geometry ────────┤
+│   bit flips : 0/0, 0/0, 0/0                         │    levels : 1          │
+│  byte flips : 0/0, 0/0, 0/0                         │   pending : 4          │
+│ arithmetics : 0/0, 0/0, 0/0                         │  pend fav : 2          │
+│  known ints : 0/0, 0/0, 0/0                         │ own finds : 0          │
+│  dictionary : 0/0, 0/0, 0/0                         │  imported : n/a        │
+│       havoc : 0/0, 0/0                              │ stability : 99.80%     │
+│        trim : 0.00%/60, n/a                         ├────────────────────────┘
+└─────────────────────────────────────────────────────┘          [cpu000: 14%]
+
+```
diff --git a/dali/fuzzing/dali_harness.h b/dali/fuzzing/dali_harness.h
@@ -0,0 +1,175 @@
+// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef DALI_FUZZING_DALI_HARNESS_H_
+#define DALI_FUZZING_DALI_HARNESS_H_
+
+#include <string>
+#include <vector>
+#include <utility>
+
+#include "dali/core/common.h"
+#include "dali/pipeline/pipeline.h"
+#include "dali/util/image.h"
+#include "dali/test/dali_test_config.h"
+
+namespace dali {
+class FileListHarness {
+ public:
+  FileListHarness(
+    string &path,
+    string file_extension = ".jpg",
+    int device_id = 0) :
+      device_id_(device_id) {
+    const string jpeg_folder = make_string(
+      testing::dali_extra_path(),
+      "/db/fuzzing/");
+    image_names_ = ImageList(jpeg_folder, {file_extension});
+    image_names_.push_back(path);
+    batch_size_ = image_names_.size();
+    LoadImages(image_names_, &images_);
+    MakeBatch();
+  }
+
+  void MakeBatch() {
+    TensorListShape<> shape(batch_size_, 1);
+    for (int i = 0; i < batch_size_; ++i) {
+      shape.set_tensor_shape(i, {images_.sizes_[i]});
+    }
+
+    input_data_.template mutable_data<uint8>();
+    input_data_.Resize(shape);
+
+    for (int i = 0; i < batch_size_; ++i) {
+      std::memcpy(
+        input_data_.template mutable_tensor<uint8>(i),
+        images_.data_[i],
+        images_.sizes_[i]);
+      input_data_.SetSourceInfo(i, image_names_[i] + "_" + std::to_string(i));
+    }
+  }
+
+  virtual void SetupPipeline(Pipeline &pipeline) = 0;
+
+  void Run() {
+    Pipeline pipeline(batch_size_, 4, device_id_);
+    SetupPipeline(pipeline);
+
+    DeviceWorkspace ws;
+    pipeline.RunCPU();
+    pipeline.RunGPU();
+    pipeline.Outputs(&ws);
+  }
+
+  virtual ~FileListHarness() {}
+
+ protected:
+  int batch_size_;
+  int device_id_;
+  TensorList<CPUBackend> input_data_;
+  vector<string> image_names_;
+  ImgSetDescr images_;
+};
+
+class DecoderHarness : public FileListHarness {
+ public:
+  explicit DecoderHarness(string &path, int device_id = 0) :
+    FileListHarness(path, ".jpg",  device_id) { }
+
+  void SetupPipeline(Pipeline &pipeline) override {
+    pipeline.AddExternalInput("raw_images");
+    pipeline.SetExternalInput("raw_images", input_data_);
+
+    pipeline.AddOperator(
+      OpSpec("ImageDecoder")
+        .AddArg("device", "mixed")
+        .AddArg("output_type", DALI_RGB)
+        .AddInput("raw_images", "cpu")
+        .AddOutput("images", "gpu"));
+    pipeline.Build({{"images", "gpu"}});
+  }
+};
+
+class ResNetHarness : public FileListHarness {
+ public:
+  explicit ResNetHarness(string &path, int device_id = 0) :
+    FileListHarness(path, ".bmp",  device_id) { }
+
+  void SetupPipeline(Pipeline &pipeline) override {
+    pipeline.AddExternalInput("raw_images");
+    pipeline.SetExternalInput("raw_images", input_data_);
+
+    pipeline.AddOperator(
+      OpSpec("ImageDecoder")
+        .AddArg("device", "cpu")
+        .AddArg("output_type", DALI_RGB)
+        .AddInput("raw_images", "cpu")
+        .AddOutput("images", "cpu"));
+
+    // Add uniform RNG
+    pipeline.AddOperator(
+        OpSpec("Uniform")
+        .AddArg("device", "cpu")
+        .AddArg("range", vector<float>{0, 1})
+        .AddOutput("uniform1", "cpu"));
+
+    pipeline.AddOperator(
+        OpSpec("Uniform")
+        .AddArg("device", "cpu")
+        .AddArg("range", vector<float>{0, 1})
+        .AddOutput("uniform2", "cpu"));
+
+    pipeline.AddOperator(
+        OpSpec("Uniform")
+        .AddArg("device", "cpu")
+        .AddArg("range", vector<float>{256, 480})
+        .AddOutput("resize", "cpu"));
+
+    // Add coin flip RNG for mirror mask
+    pipeline.AddOperator(
+        OpSpec("CoinFlip")
+        .AddArg("device", "cpu")
+        .AddArg("probability", 0.5f)
+        .AddOutput("mirror", "cpu"));
+
+    std::string resize_op = "FastResizeCropMirror";
+    // Add a resize+crop+mirror op
+    pipeline.AddOperator(
+        OpSpec(resize_op)
+        .AddArg("device", "cpu")
+        .AddArg("crop", vector<float>{224, 224})
+        .AddInput("images", "cpu")
+        .AddArgumentInput("mirror", "mirror")
+        .AddArgumentInput("crop_pos_x", "uniform1")
+        .AddArgumentInput("crop_pos_y", "uniform2")
+        .AddArgumentInput("resize_shorter", "resize")
+        .AddOutput("resized", "cpu"));
+
+    pipeline.AddOperator(
+        OpSpec("CropMirrorNormalize")
+        .AddArg("device", "cpu")
+        .AddArg("dtype", DALI_FLOAT16)
+        .AddArg("mean", vector<float>{128, 128, 128})
+        .AddArg("std", vector<float>{1, 1, 1})
+        .AddInput("resized", "cpu")
+        .AddOutput("final_batch", "cpu"));
+
+    // Build and run the pipeline
+    pipeline.Build({{"final_batch", "gpu"}});
+  }
+};
+
+}  // namespace dali
+
+#endif  // DALI_FUZZING_DALI_HARNESS_H_
diff --git a/dali/fuzzing/image_decoder_target.cc b/dali/fuzzing/image_decoder_target.cc
@@ -0,0 +1,39 @@
+// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "dali/operators.h"
+#include "dali/pipeline/init.h"
+#include "dali/pipeline/operator/op_spec.h"
+#include "dali/fuzzing/dali_harness.h"
+
+
+int main(int argc, char *argv[]) {
+  // Parse and validate command line arg
+  // This is assumed to be run through the fuzzer, so we don't check arguments validity
+  std::string path(argv[1]);
+
+  // Init DALI
+  dali::InitOperatorsLib();
+  dali::DALIInit(
+    dali::OpSpec("CPUAllocator"),
+    dali::OpSpec("PinnedCPUAllocator"),
+    dali::OpSpec("GPUAllocator"));
+
+  // Run test
+  dali::DecoderHarness harness{path};
+  harness.Run();
+
+  return 0;
+}
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		1f8ae95b793b96e94e7339a494b80ec3e1757930
		1bf411be92e65b0866a5b4a1915929c9a8c220d0