[CI] Update CI docker Image, MLIR and CK Integration - Ubuntu 20.04 (#…

…1790)
ROCm · Oct 1, 2022 · 24f745a · atamazov · Oct 4, 2022 · 24f745a
1 parent 10b8657
commit 24f745a
Show file tree

Hide file tree

Showing 80 changed files with 515 additions and 483 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -226,9 +226,6 @@ if(MIOPEN_BACKEND_OPENCL)
     set(MIOPEN_USE_COMPOSABLEKERNEL OFF)
 endif()
 message(STATUS "Enable Composable Kernels: ${MIOPEN_USE_COMPOSABLEKERNEL}")
-if(MIOPEN_USE_COMPOSABLEKERNEL)
-    find_package(composable_kernel 1.0.0 COMPONENTS device_operations)
-endif()
 
 set_var_to_condition(MIOPEN_USE_COMGR_DEFAULT (NOT DEFINED MIOPEN_BACKEND_OPENCL) AND (NOT (MIOPEN_BACKEND STREQUAL "HIPNOGPU")))
 option(MIOPEN_USE_COMGR "Use comgr to build kernels instead of offline tools" ${MIOPEN_USE_COMGR_DEFAULT})
@@ -251,7 +248,7 @@ add_definitions("-DHIP_COMPILER_FLAGS=${HIP_COMPILER_FLAGS}")
 # HIP
 if( MIOPEN_BACKEND STREQUAL "HIP" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN_BACKEND STREQUAL "HIPNOGPU")
     if(MIOPEN_USE_COMPOSABLEKERNEL)
-    find_package(composable_kernel 1.0.0 COMPONENTS device_operations)
+        find_package(composable_kernel 1.0.0 COMPONENTS device_operations)
     endif()
     if( MIOPEN_BACKEND STREQUAL "HIPNOGPU")
         set(MIOPEN_MODE_NOGPU 1)
@@ -638,18 +635,23 @@ enable_clang_tidy(
         -readability-container-data-pointer
         -readability-identifier-length
         -readability-suspicious-call-argument
-        #CPP-17 and ROCM 5.3 update related warning
-        -misc-confusable-identifiers
+        #TODO Code Quality WORKAROUND ROCm 5.3 && Ubuntu 22.04 && C++17 && cppcheck 2.9 update
+        -bugprone-use-after-move
+        -hicpp-invalid-access-moved
         -modernize-use-nodiscard
-        -modernize-concat-nested-namespaces
+        -misc-confusable-identifiers
         -modernize-unary-static-assert
         -modernize-macro-to-enum
-        -readability-simplify-boolean-expr
+        -modernize-concat-nested-namespaces
         -readability-redundant-declaration
+        -readability-simplify-boolean-expr
+        -bugprone-branch-clone
+        -clang-diagnostic-deprecated
+        -hicpp-deprecated-headers
         -hicpp-member-init
-        -bugprone-use-after-move
-        -hicpp-invalid-access-moved
-
+        -performance-no-automatic-move
+        -clang-analyzer-cplusplus.NewDeleteLeaks
+        -modernize-deprecated-headers
         ${MIOPEN_TIDY_CHECKS}
     ${MIOPEN_TIDY_ERRORS}
     HEADER_FILTER
@@ -694,6 +696,30 @@ enable_cppcheck(
         unreadVariable:*src/composable_kernel/host/*/*
         unreadVariable:*src/composable_kernel/external/*/*
         unmatchedSuppression
+        #TODO Code Quality WORKAROUND ROCm 5.3 && Ubuntu 22.04 && C++17 && cppcheck 2.9 update
+        ctuOneDefinitionRuleViolation:*test/*
+        ctuOneDefinitionRuleViolation:*src/composable_kernel/composable_kernel/*/*
+        ctuPointerArith:*test/*
+        constParameter
+        constVariable
+        variableScope
+        missingReturn
+        cstyleCast
+        unknownMacro
+        uselessCallsSubstr
+        uninitMemberVar
+        overlappingWriteUnion
+        operatorEqVarError
+        returnTempReference
+        objectIndex
+        integerOverflowCond
+        rethrowNoCurrentException
+        mismatchingContainers
+        unreadVariable
+        CastIntegerToAddressAtReturn
+        knownConditionTrueFalse
+        shadowFunction
+        moduloofone
     FORCE
     SOURCES
         addkernels/

diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,5 @@
 FROM ubuntu:20.04 as miopen
+ARG DEBIAN_FRONTEND=noninteractive
 
 ARG USE_MLIR="OFF"
 
@@ -12,57 +13,49 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
     ca-certificates \
     curl \
     libnuma-dev \
-    gnupg \
+    gnupg2 \
     wget
 
 #Add gpg keys
+ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn
 RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 9386B48A1A693C5C && \
     wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
 
+RUN wget https://repo.radeon.com/amdgpu-install/5.3/ubuntu/focal/amdgpu-install_5.3.50300-1_all.deb  --no-check-certificate
+RUN apt-get update && \
+DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
+    ./amdgpu-install_5.3.50300-1_all.deb
+
 # Add rocm repository
 # Note: The ROCm version with $USE_MLIR should keep in sync with default ROCm version
 # unless MLIR library is incompatible with current ROCm.
-RUN export ROCM_APT_VER=.apt_5.2.3;\
+RUN export ROCM_APT_VER=5.3;\
 echo $ROCM_APT_VER &&\
 sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/$ROCM_APT_VER/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
 RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
 
+RUN amdgpu-install -y --usecase=rocm --no-dkms
+
 # Install dependencies
 RUN apt-get update && \
 DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
     build-essential \
     cmake \
-    comgr \
-    clang-format-10 \
+    clang-format-12 \
     doxygen \
-    g++ \
     gdb \
     git \
-    hip-rocclr \
     lcov \
-    libelf-dev \
     libncurses5-dev \
-    libpthread-stubs0-dev \
     llvm-amdgpu \
     miopengemm \
     pkg-config \
-    python \
-    python3 \
-    python-dev \
     python3-dev \
     python3-pip \
-    python3-distutils \
     python3-venv \
-    software-properties-common \
-    rocm-dev \
-    rocm-device-libs \
-    rocm-opencl \
-    rocm-opencl-dev \
     rocblas \
     rpm \
-    zlib1g-dev \
-    kmod && \
-    apt-get remove -y rocm-cmake && \
+    software-properties-common && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 

diff --git a/Jenkinsfile b/Jenkinsfile
@@ -471,7 +471,7 @@ pipeline {
                                 -o -iname \'*.cpp.in\' \
                                 -o -iname \'*.cl\' \
                                 | grep -v -E '(build/)|(install/)' \
-                                | xargs -n 1 -P 1 -I{} -t sh -c \'clang-format-10 -style=file {} | diff - {}\'"
+                                | xargs -n 1 -P 1 -I{} -t sh -c \'clang-format-12 -style=file {} | diff - {}\'"
                     }
                     steps{
                         buildHipClangJobAndReboot(setup_cmd: "", build_cmd: "", execute_cmd: execute_cmd, needs_gpu:false)

diff --git a/cmake/EnableCompilerWarnings.cmake b/cmake/EnableCompilerWarnings.cmake
@@ -67,6 +67,7 @@ else()
             -Wunused
             -Wno-ignored-qualifiers
             -Wno-sign-compare
+            -Wno-deprecated
         )
         if (CMAKE_${COMPILER}_COMPILER_ID MATCHES "Clang")
             list(APPEND CMAKE_COMPILER_WARNINGS

diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -1,5 +1,3 @@
 ROCmSoftwarePlatform/rocm-recipes
-RadeonOpenCompute/rocm-cmake@04f694df2a8dc9d7e35fa4dee4ba5fa407ec04f8 --build
 -f requirements.txt
-# 1.90+
-danmar/cppcheck@dd05839a7e63ef04afd34711cb3e1e0ef742882f
+danmar/[email protected]
diff --git a/doc/requirements.txt b/doc/requirements.txt
@@ -24,8 +24,8 @@
 # 
 ################################################################################
 # Python 3.6+ required
-sphinx==4.1.2
-breathe==4.30.0
+sphinx==5.2.0
+breathe==4.34.0
 docutils<0.17 # sphinx-rtd-theme 0.5.2 requires docutils<0.17
 sphinx_rtd_theme==0.5.2
-myst-parser==0.15.1
+myst-parser==0.18.0
diff --git a/driver/activ_driver.hpp b/driver/activ_driver.hpp
@@ -250,7 +250,7 @@ int ActivationDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
             double v = -alpha / beta;
             in[i]    = i % 2 ? RAN_GEN<Tgpu>(static_cast<Tgpu>((v + 0.005) / beta),
                                           static_cast<Tgpu>((v + 2.0) / beta))
-                          : RAN_GEN<Tgpu>(static_cast<Tgpu>((v - 2.0) / beta),
+                             : RAN_GEN<Tgpu>(static_cast<Tgpu>((v - 2.0) / beta),
                                           static_cast<Tgpu>((v - 0.005) / beta));
             break;
         }

diff --git a/driver/conv_driver.hpp b/driver/conv_driver.hpp
@@ -2175,7 +2175,7 @@ int ConvDriver<Tgpu, Tref>::RunForwardGPUReference()
     auto ref_solution_id = miopen::deref(convDesc).mode == miopenTranspose
                                ? miopen::solver::Id("ConvDirectNaiveConvBwd").Value()
                                : miopen::solver::Id("ConvDirectNaiveConvFwd").Value();
-    auto rc = miopenConvolutionForwardImmediate(handle,
+    auto rc              = miopenConvolutionForwardImmediate(handle,
                                                 weightTensor,
                                                 wei_dev->GetMem(),
                                                 inputTensor,
@@ -3140,7 +3140,7 @@ int ConvDriver<Tgpu, Tref>::RunBackwardDataGPUReference()
     auto ref_solution_id = miopen::deref(convDesc).mode == miopenTranspose
                                ? miopen::solver::Id("ConvDirectNaiveConvFwd").Value()
                                : miopen::solver::Id("ConvDirectNaiveConvBwd").Value();
-    auto rc = miopenConvolutionBackwardDataImmediate(handle,
+    auto rc              = miopenConvolutionBackwardDataImmediate(handle,
                                                      outputTensor,
                                                      dout_dev->GetMem(),
                                                      weightTensor,
@@ -3317,8 +3317,8 @@ int ConvDriver<Tgpu, Tref>::VerifyForward()
 
     const auto isInt8 = (data_type == miopenInt8 || data_type == miopenInt8x4);
     auto error        = is_fwd_run_failed ? std::numeric_limits<double>::max()
-                                   : (isInt8 ? miopen::rms_range(outhost.data, out_int8)
-                                             : miopen::rms_range(outhost.data, out.data));
+                                          : (isInt8 ? miopen::rms_range(outhost.data, out_int8)
+                                                    : miopen::rms_range(outhost.data, out.data));
 
     auto tolerance = GetDefaultTolerance();
     // iGemm's deviation is higher than other algorithms.

diff --git a/driver/main.cpp b/driver/main.cpp
@@ -199,9 +199,9 @@ int main(int argc, char* argv[])
         return rc;
     }
 
-    int fargval = ((base_arg != "CBAInfer") && (base_arg != "CBAInferfp16"))
-                      ? drv->GetInputFlags().GetValueInt("forw")
-                      : 1;
+    int fargval       = ((base_arg != "CBAInfer") && (base_arg != "CBAInferfp16"))
+                            ? drv->GetInputFlags().GetValueInt("forw")
+                            : 1;
     bool bnFwdInVer   = (fargval == 2 && (base_arg == "bnorm"));
     bool verifyarg    = (drv->GetInputFlags().GetValueInt("verify") == 1);
     int cumulative_rc = 0; // Do not stop running tests in case of errors.

diff --git a/driver/reduce_driver.hpp b/driver/reduce_driver.hpp
@@ -313,8 +313,8 @@ int ReduceDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
     miopenGetReductionIndicesSize(
         GetHandle(), reduceDesc, inputTensor, outputTensor, &this->indices_sizeInBytes);
 
-    size_t ws_nelem = (!this->need_indices) ? this->ws_sizeInBytes / sizeof(Tgpu)
-                                            : this->ws_sizeInBytes / (sizeof(Tgpu) + sizeof(int));
+    size_t ws_nelem      = (!this->need_indices) ? this->ws_sizeInBytes / sizeof(Tgpu)
+                                                 : this->ws_sizeInBytes / (sizeof(Tgpu) + sizeof(int));
     size_t indices_nelem = this->indices_sizeInBytes / sizeof(int);
 
 #if MIOPEN_BACKEND_OPENCL
@@ -328,7 +328,7 @@ int ReduceDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
     out_dev = std::unique_ptr<GPUMem>(new GPUMem(ctx, out_nelem, sizeof(Tgpu)));
     ws_dev  = this->need_indices ? std::unique_ptr<GPUMem>(new GPUMem(
                                       ctx, ws_nelem * 2, std::max<int>(sizeof(Tgpu), sizeof(int))))
-                                : std::unique_ptr<GPUMem>(new GPUMem(ctx, ws_nelem, sizeof(Tgpu)));
+                                 : std::unique_ptr<GPUMem>(new GPUMem(ctx, ws_nelem, sizeof(Tgpu)));
 
     indices_dev = std::unique_ptr<GPUMem>(new GPUMem(ctx, indices_nelem, sizeof(int)));
 
@@ -385,9 +385,9 @@ int ReduceDriver<Tgpu, Tref>::RunForwardGPU()
     const void* const alphaPtr = std::is_same<Tgpu, double>::value
                                      ? static_cast<const void*>(&alpha64)
                                      : static_cast<const void*>(&alpha);
-    const void* const betaPtr = std::is_same<Tgpu, double>::value
-                                    ? static_cast<const void*>(&beta64)
-                                    : static_cast<const void*>(&beta);
+    const void* const betaPtr  = std::is_same<Tgpu, double>::value
+                                     ? static_cast<const void*>(&beta64)
+                                     : static_cast<const void*>(&beta);
 
     miopenReduceTensor(GetHandle(),
                        reduceDesc,

diff --git a/fin/src/include/fin.hpp b/fin/src/include/fin.hpp
@@ -149,8 +149,8 @@ class BaseFin
             {
                 auto p = handle.LoadProgram(kern.kernel_file, kern.comp_options, false, "");
                 hsaco  = p.IsCodeObjectInMemory()
-                            ? p.GetCodeObjectBlob()
-                            : miopen::LoadFile(p.GetCodeObjectPathname().string());
+                             ? p.GetCodeObjectBlob()
+                             : miopen::LoadFile(p.GetCodeObjectPathname().string());
                 if(hsaco.empty())
                 {
                     std::cerr << "Got empty code object" << std::endl;

diff --git a/install_deps.cmake b/install_deps.cmake
@@ -113,5 +113,5 @@ cget(init ${TOOLCHAIN_FLAG} -DCMAKE_INSTALL_RPATH=${PREFIX}/lib ${PARSE_UNPARSED
 cget(ignore pcre)
 
 # Install dependencies
-cget(install -U pfultz2/rocm-recipes)
+cget(install -U ROCmSoftwarePlatform/rocm-recipes)
 cget(install -U -f requirements.txt)
diff --git a/rbuild.ini b/rbuild.ini
@@ -11,7 +11,6 @@ cxx = ${rocm_path}/llvm/bin/clang++
 cc = ${rocm_path}/llvm/bin/clang
 ignore = pcre
 deps =
-    kitware/[email protected]
     -f dev-requirements.txt
 define =
     BUILD_DEV=On
@@ -22,7 +21,4 @@ cc = ${rocm_path}/llvm/bin/clang
 ignore = pcre
 deps =
     ROCmSoftwarePlatform/rocm-recipes
-    kitware/[email protected]
     -f dev-requirements.txt
-    nlohmann/json@350ff4f7ced7c4117eae2fb93df02823c8021fcb
-
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,3 @@
-RadeonOpenCompute/rocm-cmake@04f694df2a8dc9d7e35fa4dee4ba5fa407ec04f8 --build
 sqlite3@3.17 -DCMAKE_POSITION_INDEPENDENT_CODE=On
 boost@1.79 -DCMAKE_POSITION_INDEPENDENT_CODE=On --build
 half,https://github.com/pfultz2/half/archive/1.12.0.tar.gz -X header -H sha256:0a08660b68abb176ebc2a0cdf8de46e3182a7f46c66443bb80dbfaaec98cf969 --build

diff --git a/src/activ/problem_description.cpp b/src/activ/problem_description.cpp
@@ -40,15 +40,15 @@ NetworkConfig ProblemDescription::MakeNetworkConfig() const
 
     const auto x_elem_sz = xDesc.GetElementSize();
 
-    const auto x_width2D =
-        ((x_lens.size() == 2)
-             ? x_lens[1]
-             : (x_lens.size() == 3) ? x_lens[2] : (x_lens.size() == 4) ? x_lens[3] : x_lens[4]);
-
-    const auto height =
-        (x_lens.size() == 2)
-            ? x_lens[0]
-            : (x_lens.size() == 3) ? x_lens[1] : (x_lens.size() == 4) ? x_lens[2] : x_lens[3];
+    const auto x_width2D = ((x_lens.size() == 2)   ? x_lens[1]
+                            : (x_lens.size() == 3) ? x_lens[2]
+                            : (x_lens.size() == 4) ? x_lens[3]
+                                                   : x_lens[4]);
+
+    const auto height = (x_lens.size() == 2)   ? x_lens[0]
+                        : (x_lens.size() == 3) ? x_lens[1]
+                        : (x_lens.size() == 4) ? x_lens[2]
+                                               : x_lens[3];
 
     const auto packed = xDesc.IsPacked() && yDesc.IsPacked();
 

diff --git a/src/binary_cache.cpp b/src/binary_cache.cpp
@@ -67,8 +67,8 @@ static boost::filesystem::path ComputeUserCachePath()
 
     const char* const custom = miopen::GetStringEnv(MIOPEN_CUSTOM_CACHE_DIR{});
     const auto p             = (custom != nullptr && strlen(custom) > 0)
-                       ? boost::filesystem::path{miopen::ExpandUser(custom)}
-                       : boost::filesystem::path{miopen::ExpandUser(cache_dir)} / version;
+                                   ? boost::filesystem::path{miopen::ExpandUser(custom)}
+                                   : boost::filesystem::path{miopen::ExpandUser(cache_dir)} / version;
 
     if(!boost::filesystem::exists(p) && !MIOPEN_DISABLE_USERDB)
         boost::filesystem::create_directories(p);

diff --git a/src/composable_kernel/composable_kernel/include/tensor_description/multi_index_transform.hpp b/src/composable_kernel/composable_kernel/include/tensor_description/multi_index_transform.hpp
@@ -1039,13 +1039,13 @@ struct Merge_v2_magic_division
     using UpLengths =
         decltype(make_tuple(container_reduce(LowLengths{}, math::multiplies{}, Number<1>{})));
 
-    using LowLengthsMagicDivisorMultipiler = decltype(
-        generate_tuple(lambda_merge_generate_MagicDivision_calculate_magic_multiplier<LowLengths>{},
-                       Number<NDimLow>{}));
+    using LowLengthsMagicDivisorMultipiler = decltype(generate_tuple(
+        lambda_merge_generate_MagicDivision_calculate_magic_multiplier<LowLengths>{},
+        Number<NDimLow>{}));
 
-    using LowLengthsMagicDivisorShift = decltype(
-        generate_tuple(lambda_merge_generate_MagicDivision_calculate_magic_shift<LowLengths>{},
-                       Number<NDimLow>{}));
+    using LowLengthsMagicDivisorShift = decltype(generate_tuple(
+        lambda_merge_generate_MagicDivision_calculate_magic_shift<LowLengths>{},
+        Number<NDimLow>{}));
 
     LowLengths low_lengths_;
     LowLengthsMagicDivisorMultipiler low_lengths_magic_divisor_multiplier_;
@@ -1198,9 +1198,9 @@ struct Merge_v2r2_magic_division
         lambda_merge_generate_MagicDivision_calculate_magic_multiplier<LowLengthsScan>{},
         Number<NDimLow>{}));
 
-    using LowLengthsScanMagicDivisorShift = decltype(
-        generate_tuple(lambda_merge_generate_MagicDivision_calculate_magic_shift<LowLengthsScan>{},
-                       Number<NDimLow>{}));
+    using LowLengthsScanMagicDivisorShift = decltype(generate_tuple(
+        lambda_merge_generate_MagicDivision_calculate_magic_shift<LowLengthsScan>{},
+        Number<NDimLow>{}));
 
     LowLengths low_lengths_;
     LowLengthsScan low_lengths_scan_;

diff --git a/...sable_kernel/composable_kernel/include/tensor_operation/reduction_functions_blockwise.hpp b/...sable_kernel/composable_kernel/include/tensor_operation/reduction_functions_blockwise.hpp
@@ -61,9 +61,9 @@ struct BlockwiseReduction_2d_block_buffer
         index_t offset;
         for(index_t otherDimInd = 0; otherDimInd < toReduceBlocks; otherDimInd++)
         {
-            offset = blockIsOneRow
-                         ? buffer2dDesc.CalculateOffset(make_tuple(otherDimInd, thread_local_id))
-                         : buffer2dDesc.CalculateOffset(make_tuple(thread_local_id, otherDimInd));
+            offset          = blockIsOneRow
+                                  ? buffer2dDesc.CalculateOffset(make_tuple(otherDimInd, thread_local_id))
+                                  : buffer2dDesc.CalculateOffset(make_tuple(thread_local_id, otherDimInd));
             compType opData = type_convert<compType>{}(block_buffer[offset]);
 
             binop::calculate(lAccuData, opData);