newgrit1004
diff --git a/‎.gitmodules
+1-1 b/‎.gitmodules
+1-1
diff --git a/‎CMakeLists.txt
+1-2 b/‎CMakeLists.txt
+1-2
diff --git a/‎CODING-GUIDELINES.md
+10-10 b/‎CODING-GUIDELINES.md
+10-10
diff --git a/‎cmake/modules/find_library_create_target.cmake
+1-1 b/‎cmake/modules/find_library_create_target.cmake
+1-1
diff --git a/‎cmake/modules/set_ifndef.cmake
+1-1 b/‎cmake/modules/set_ifndef.cmake
+1-1
diff --git a/‎cmake/toolchains/cmake_aarch64-android.toolchain
+2-2 b/‎cmake/toolchains/cmake_aarch64-android.toolchain
+2-2
diff --git a/‎cmake/toolchains/cmake_aarch64.toolchain
+1-1 b/‎cmake/toolchains/cmake_aarch64.toolchain
+1-1
diff --git a/‎cmake/toolchains/cmake_ppc64le.toolchain
+1-1 b/‎cmake/toolchains/cmake_ppc64le.toolchain
+1-1
diff --git a/‎cmake/toolchains/cmake_qnx.toolchain
+1-1 b/‎cmake/toolchains/cmake_qnx.toolchain
+1-1
diff --git a/‎cmake/toolchains/cmake_x64_win.toolchain
+2-2 b/‎cmake/toolchains/cmake_x64_win.toolchain
+2-2
diff --git a/‎cmake/toolchains/cmake_x86_64.toolchain
+1-1 b/‎cmake/toolchains/cmake_x86_64.toolchain
+1-1
diff --git a/‎demo/BERT/CMakeLists.txt
+3-4 b/‎demo/BERT/CMakeLists.txt
+3-4
diff --git a/‎demo/BERT/builder.py
+7-7 b/‎demo/BERT/builder.py
+7-7
diff --git a/‎demo/BERT/builder_varseqlen.py
+14-14 b/‎demo/BERT/builder_varseqlen.py
+14-14
diff --git a/‎demo/BERT/helpers/calibrator.py
+1-2 b/‎demo/BERT/helpers/calibrator.py
+1-2
@@ -9,4 +9,4 @@
 [submodule "parsers/onnx"]
 	path = parsers/onnx
 	url = https://github.com/onnx/onnx-tensorrt.git
-	branch = 7.2.1
+	branch = master
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -194,4 +194,3 @@ endif()
 if(BUILD_SAMPLES)
     add_subdirectory(samples)
 endif()
-
@@ -76,7 +76,7 @@ if (nbInputs == kNbInputsWBias) {/*...*/}
    * Public member variables do not require the 'm' prefix but it is highly encouraged to use the prefix when needed to improve code clarity, especially in cases where the class is a base class in an inheritance chain.
 
 8. Constants
-   * Enumerations, global constants, static constants at class-scope and function-scope magic-number/literal constants are uppercase snakecase with prefix 'k': 
+   * Enumerations, global constants, static constants at class-scope and function-scope magic-number/literal constants are uppercase snakecase with prefix 'k':
 ```cpp
 const int kDIGIT_NUM = 10;
 ```
@@ -103,7 +103,7 @@ Notes:
 
 #### Formatting
 1. Use the [LLVM clang-format](https://clang.llvm.org/docs/ClangFormat.html) tool for formatting your changes prior to submitting the PR.
-2. Use a maximum of 120 characters per line. The auto formatting tool will wrap longer lines. 
+2. Use a maximum of 120 characters per line. The auto formatting tool will wrap longer lines.
 3. Exceptions to formatting violations must be justified on a per-case basis. Bypassing the formatting rules is discouraged, but can be achieved for exceptions as follows:
 ```cpp
 // clang-format off
@@ -133,7 +133,7 @@ doSomeOperation(/* checkForErrors = */ false);
 ```cpp
 //! This is a Doxygen comment
 //! in C++ style
- 
+
 struct Foo
 {
     int x; //!< This is a Doxygen comment for members
@@ -149,7 +149,7 @@ struct Foo
 #endif
 ```
 
-```cpp 
+```cpp
 // Alternative: use a macro which evaluates to a noop in release code.
 #if DEBUG_CONVOLUTION_INSTRUMENTATION
 # define DEBUG_CONV_CODE(x) x
@@ -235,7 +235,7 @@ switch (x) case 4: if (y) case 5: return 0; else default: return 1;
 switch (x)
 {
 case 0:         // Fall-through allowed from case 0: to case 1: since case 0 is empty.
-case 1:    
+case 1:
     a();
     b();
     break;
@@ -250,7 +250,7 @@ case 5:
     c();
     throw 42;  // Terminating with throw is okay
 default:
-    throw 42;   
+    throw 42;
 }
 ```
 
@@ -297,7 +297,7 @@ case 1:
 #### Preprocessor Directives
 1. *MISRA C++: 2008 Rule 16-0-2*
    `#define` and `#undef` of macros should be done only at global namespace.
-2. Avoid the use of `#ifdef` and `#ifndef` directives (except in the case of header include guards). Prefer to use `#if defined(...)` or `#if !defined(...)` instead. The latter syntax is more consistent with C syntax, and allows you to use more complicated preprocessor conditionals, e.g.: 
+2. Avoid the use of `#ifdef` and `#ifndef` directives (except in the case of header include guards). Prefer to use `#if defined(...)` or `#if !defined(...)` instead. The latter syntax is more consistent with C syntax, and allows you to use more complicated preprocessor conditionals, e.g.:
 ```cpp
 #if defined(FOO) || defined(BAR)
 void foo();
@@ -343,14 +343,14 @@ for (size_t i = 0; i < mTensors.size(); ++i) // preferred style
 ```
 * Using only signed integers for the above would lead to prolixity and perhaps unsafe narrowing:
 ```cpp
-for (int i = 0; i < static_cast<int>(mTensors.size()); ++i)  
+for (int i = 0; i < static_cast<int>(mTensors.size()); ++i)
 ```
 
 
 #### Special Considerations for API
 1. The API consists, with very few exceptions, of methodless structs and pure virtual interface classes.
 2. API class methods should be either virtual or inline.
-3. The API does not use  integral types with platform-dependent sizes, other than `int`, `unsigned`, and `bool`.   `size_t` should be used only for sizes of memory buffers. 
+3. The API does not use  integral types with platform-dependent sizes, other than `int`, `unsigned`, and `bool`.   `size_t` should be used only for sizes of memory buffers.
 4. The API does not use any aggregate types (e.g. `std::string`) which may be compiled differently with different compilers and libraries.
 5. The API minimizes dependencies on system headers - currently only `<cstddef>` and `<cstdint>`.
 6. Memory ownership may not be transferred across API boundaries - any memory allocated inside a library must be freed inside the library.
@@ -419,7 +419,7 @@ char const * const errStr = getErrorStr(status);
 1. All TensorRT Open Source Software code should contain an NVIDIA copyright header that includes the current year.  The following block of text should be prepended to the top of all OSS files.  This includes .cpp, .h, .cu, .py, and any other source files which are compiled or interpreted.
 ```cpp
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
 
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -40,7 +40,7 @@ set(CMAKE_CUDA_COMPILER_FORCED TRUE)
 
 set(CUDA_LIBS -L${CUDA_ROOT}/lib64)
 
-set(ADDITIONAL_PLATFORM_LIB_FLAGS ${CUDA_LIBS} -lcublas -lcudart -lnvToolsExt -lculibos -lcudadevrt -llog)	
+set(ADDITIONAL_PLATFORM_LIB_FLAGS ${CUDA_LIBS} -lcublas -lcudart -lnvToolsExt -lculibos -lcudadevrt -llog)
 
 
 set(DISABLE_SWIG TRUE)
 
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -38,7 +38,7 @@ set(W10_LINKER ${MSVC_COMPILER_DIR}/bin/amd64/link)
 
 
 set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_NVCC_COMPILER} CACHE STRING "" FORCE)
- 
+
 set(ADDITIONAL_PLATFORM_INCL_FLAGS "-I${MSVC_COMPILER_DIR}/include -I${MSVC_COMPILER_DIR}/../ucrt/include")
 set(ADDITIONAL_PLATFORM_LIB_FLAGS ${ADDITIONAL_PLATFORM_LIB_FLAGS} "-LIBPATH:${NV_TOOLS}/ddk/wddmv2/official/17134/Lib/10.0.17134.0/um/x64")
 set(ADDITIONAL_PLATFORM_LIB_FLAGS ${ADDITIONAL_PLATFORM_LIB_FLAGS} "-LIBPATH:${MSVC_COMPILER_DIR}/lib/amd64" )
 
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -49,7 +49,7 @@ link_directories(
     ${TRT_LIB_DIR}
 )
 
-pybind11_add_module(infer_c 
+pybind11_add_module(infer_c
     infer_c/infer_c.cpp
     infer_c/logging.cpp
 )
@@ -64,9 +64,8 @@ add_executable(perf
     infer_c/logging.cpp
 )
 
-target_link_libraries(perf 
+target_link_libraries(perf
     ${CUDA_LIBRARIES}
     nvinfer
     nvinfer_plugin
 )
-
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 #
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -229,7 +229,7 @@ def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, imas
     hidden_size = idims[2]
 
     if config.use_qat:
-        dr_input = init_dict[prefix + 'attention_self_query_input_amax'] 
+        dr_input = init_dict[prefix + 'attention_self_query_input_amax']
         assert(dr_input ==init_dict[prefix + 'attention_self_key_input_amax'] )
         assert(dr_input ==init_dict[prefix + 'attention_self_value_input_amax'] )
         input_tensor.set_dynamic_range(-dr_input, dr_input)
@@ -293,7 +293,7 @@ def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, imas
             dr_gelu = init_dict[prefix + 'output_dense_input_amax']
             set_output_range(gelu_layer, dr_gelu)
         else:
-            # use gelu10 according to whitepaper http://arxiv.org/abs/2004.09602 
+            # use gelu10 according to whitepaper http://arxiv.org/abs/2004.09602
             set_output_range(gelu_layer, 10)
 
     # FC2
@@ -445,7 +445,7 @@ def onnx_to_trt_name(onnx_name):
     toks = [t.strip('_') for t in onnx_name.split('.')]
     if toks[0] == 'bert': #embeddings or encoder
         if toks[1] == 'encoder': #transformer
-            
+
             if toks[-2] == 'layernorm': #bias->beta, weight->gamma
                 toks[-1] = 'beta' if toks[-1] == 'bias' else 'gamma'
             elif (toks[-2] == 'dense' or toks[-2] in {'key', 'value', 'query'}) and toks[-1] == 'weight':
@@ -455,7 +455,7 @@ def onnx_to_trt_name(onnx_name):
                     toks[-2] = 'kernel'
                 elif toks[-2] == 'input_quantizer':
                     toks[-2] = 'input'
-            
+
             if 'final_input_quantizer' not in toks[2]:
                 toks = toks[3:]
                 toks[0] = 'l{}'.format(int(toks[0]))
@@ -503,10 +503,10 @@ def load_onnx_weights_and_quant(path, config):
             Bqkv[0,:] = tensor
             Bqkv[1,:] = tensor_dict[prefix + BK]
             Bqkv[2,:] = tensor_dict[prefix + BV]
-    
+
             Wqkv = np.ascontiguousarray(Wqkv.reshape((3, N, H, N, H)).transpose((1,0,2,3,4)))
             Bqkv = np.ascontiguousarray(Bqkv.reshape((3, N, H)).transpose((1,0,2)))
-    
+
             weights_dict[prefix + WQKV] = trt.Weights(Wqkv)
             weights_dict[prefix + BQKV] = trt.Weights(Bqkv)
             weights_dict[prefix + WQKV + "_notrans"] = trt.Weights(Wqkv.T)
 
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 #
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -165,9 +165,9 @@ def attention_layer_opt(prefix, config, init_dict, network, input_tensor, mask_i
         qkv2ctx_plug = mha_plg_creator3.create_plugin("qkv2ctx", pfc)
         qkv_in = [mult_all.get_output(0), cu_seqlens, max_seqlen]
     else:
-        fields.append(pf_has_mask) 
-        fields.append(pf_type) 
-        fields.append(pf_var_seqlen) 
+        fields.append(pf_has_mask)
+        fields.append(pf_type)
+        fields.append(pf_var_seqlen)
         pfc = trt.PluginFieldCollection(fields)
         qkv2ctx_plug = mha_plg_creator2.create_plugin("qkv2ctx", pfc)
         qkv_in = [mult_all.get_output(0), mask_idx, cu_seqlens, max_seqlen]
@@ -212,7 +212,7 @@ def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, mask
     hidden_size = config.hidden_size
 
     if config.use_qat:
-        dr_input = init_dict[prefix + 'attention_self_query_input_amax'] 
+        dr_input = init_dict[prefix + 'attention_self_query_input_amax']
         assert(dr_input ==init_dict[prefix + 'attention_self_key_input_amax'] )
         assert(dr_input ==init_dict[prefix + 'attention_self_value_input_amax'] )
         input_tensor.set_dynamic_range(-dr_input, dr_input)
@@ -270,7 +270,7 @@ def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, mask
             dr_gelu = init_dict[prefix + 'output_dense_input_amax']
             set_output_range(gelu_layer, dr_gelu)
         else:
-            # use gelu10 according to whitepaper http://arxiv.org/abs/2004.09602 
+            # use gelu10 according to whitepaper http://arxiv.org/abs/2004.09602
             set_output_range(gelu_layer, 10)
 
     # FC2
@@ -423,7 +423,7 @@ def onnx_to_trt_name(onnx_name):
     toks = [t.strip('_') for t in onnx_name.split('.')]
     if toks[0] == 'bert': #embeddings or encoder
         if toks[1] == 'encoder': #transformer
-            
+
             if toks[-2] == 'layernorm': #bias->beta, weight->gamma
                 toks[-1] = 'beta' if toks[-1] == 'bias' else 'gamma'
             elif (toks[-2] == 'dense' or toks[-2] in {'key', 'value', 'query'}) and toks[-1] == 'weight':
@@ -433,7 +433,7 @@ def onnx_to_trt_name(onnx_name):
                     toks[-2] = 'kernel'
                 elif toks[-2] == 'input_quantizer':
                     toks[-2] = 'input'
-            
+
             if 'final_input_quantizer' not in toks[2]:
                 toks = toks[3:]
                 toks[0] = 'l{}'.format(int(toks[0]))
@@ -481,14 +481,14 @@ def load_onnx_weights_and_quant(path, config):
             Bqkv[0,:] = tensor
             Bqkv[1,:] = tensor_dict[prefix + BK]
             Bqkv[2,:] = tensor_dict[prefix + BV]
-    
+
             if config.use_int8 and config.interleaved:
                 Wqkv = np.ascontiguousarray(Wqkv.reshape((3, N, H, N, H)))
                 Bqkv = np.ascontiguousarray(Bqkv.reshape((3, N, H)))
             else:
                 Wqkv = np.ascontiguousarray(Wqkv.reshape((3, N, H, N, H)).transpose((1,0,2,3,4)))
                 Bqkv = np.ascontiguousarray(Bqkv.reshape((3, N, H)).transpose((1,0,2)))
-    
+
             weights_dict[prefix + WQKV] = trt.Weights(Wqkv)
             weights_dict[prefix + BQKV] = trt.Weights(Bqkv)
             weights_dict[prefix + WQKV + "_notrans"] = trt.Weights(Wqkv.T)
@@ -513,7 +513,7 @@ def emb_layernorm(builder, network, config, weights_dict, builder_config, max_se
     cu_seqlens = network.add_input(name="cu_seqlens", dtype=trt.int32, shape=(-1,))
     max_seqlen = network.add_input(name="max_seqlen", dtype=trt.int32, shape=(-1,))
 
-    # Specify profiles 
+    # Specify profiles
     profile = builder.create_optimization_profile()
     min_shape = (1,)
     shape = (max_sequence_length*max_batch_size,)
@@ -538,7 +538,7 @@ def emb_layernorm(builder, network, config, weights_dict, builder_config, max_se
     emb_layer = network.add_plugin_v2(inputs, fn)
 
     if config.use_int8 and config.use_qat:
-        dr_input = weights_dict['l0_attention_self_query_input_amax'] 
+        dr_input = weights_dict['l0_attention_self_query_input_amax']
         set_output_range(emb_layer, dr_input)
     set_output_name(emb_layer, "embeddings_", "output")
     return emb_layer, cu_seqlens, max_seqlen
@@ -559,9 +559,9 @@ def build_engine(batch_size, workspace_size, sequence_length, config, weights_di
         emb_layer, cu_seqlens, max_seqlen = emb_layernorm(builder, network, config, weights_dict, builder_config, sequence_length, batch_size)
         embeddings = emb_layer.get_output(0)
         if config.use_int8 and config.interleaved:
-            shuffle = network.add_shuffle(embeddings) 
+            shuffle = network.add_shuffle(embeddings)
             shuffle.second_transpose = (2, 1, 0, 3)
-            embeddings = shuffle.get_output(0) 
+            embeddings = shuffle.get_output(0)
             mask_idx = None
         else:
             mask_idx = emb_layer.get_output(1)
 
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 #
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -109,4 +109,3 @@ def read_histogram_cache(self, length):
 
     def write_histogram_cache(self, ptr, length):
         return None
-
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`#`
`2`		`-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.`
	`2`	`+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.`
`3`	`3`	`#`
`4`	`4`	`# Licensed under the Apache License, Version 2.0 (the "License");`
`5`	`5`	`# you may not use this file except in compliance with the License.`
`@@ -194,4 +194,3 @@ endif()`
`194`	`194`	`if(BUILD_SAMPLES)`
`195`	`195`	`add_subdirectory(samples)`
`196`	`196`	`endif()`
`197`		`-`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`#!/usr/bin/env python3`
`2`	`2`	`#`
`3`		`-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.`
	`3`	`+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.`
`4`	`4`	`#`
`5`	`5`	`# Licensed under the Apache License, Version 2.0 (the "License");`
`6`	`6`	`# you may not use this file except in compliance with the License.`
`@@ -109,4 +109,3 @@ def read_histogram_cache(self, length):`
`109`	`109`
`110`	`110`	`def write_histogram_cache(self, ptr, length):`
`111`	`111`	`return None`
`112`		`-`