[ROCm] bazel build system and continuous integration logic

The commit contains following components to support TensorFlow on ROCm platform - bazel build system - continuous integration logic Authors: - Jack Chung: [email protected] - Jeffrey Poznanovic: [email protected] - Peng Sun: [email protected]
chinaericy · Sep 5, 2018 · 69d3b8f · 69d3b8f
1 parent 47b1af2
commit 69d3b8f
Show file tree

Hide file tree

Showing 33 changed files with 1,611 additions and 23 deletions.
diff --git a/configure.py b/configure.py
@@ -1521,6 +1521,13 @@ def main():
     else:
       set_trisycl_include_dir(environ_cp)
 
+  set_action_env_var(environ_cp, 'TF_NEED_ROCM', 'ROCm', False)
+  if (environ_cp.get('TF_NEED_ROCM') == '1' and
+      'LD_LIBRARY_PATH' in environ_cp and environ_cp.get(
+      'LD_LIBRARY_PATH') != '1'):
+      write_action_env_to_bazelrc('LD_LIBRARY_PATH',
+                                  environ_cp.get('LD_LIBRARY_PATH'))
+
   set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False)
   if (environ_cp.get('TF_NEED_CUDA') == '1' and
       'TF_CUDA_CONFIG_REPO' not in environ_cp):
@@ -1561,6 +1568,19 @@ def main():
       write_to_bazelrc('build --config=download_clang')
       write_to_bazelrc('test --config=download_clang')
 
+  # SYCL / ROCm / CUDA are mutually exclusive.
+  # At most 1 GPU platform can be configured.
+  gpu_platform_count = 0
+  if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
+    gpu_platform_count += 1
+  if environ_cp.get('TF_NEED_ROCM') == '1':
+    gpu_platform_count += 1
+  if environ_cp.get('TF_NEED_CUDA') == '1':
+    gpu_platform_count += 1
+  if gpu_platform_count >= 2:
+    raise UserInputError('SYCL / CUDA / ROCm are mututally exclusive. '
+                         'At most 1 GPU platform can be configured.')
+
   set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False)
   if environ_cp.get('TF_NEED_MPI') == '1':
     set_mpi_home(environ_cp)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
@@ -146,7 +146,7 @@ load(
     "if_static",
     "tf_cuda_tests_tags",
 )
-load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda", "if_cuda_is_configured")
 load("@io_bazel_rules_closure//closure:defs.bzl", "closure_proto_library")
 load(
     "//third_party/mkl:build_defs.bzl",
@@ -2941,7 +2941,7 @@ tf_cuda_library(
         "platform/device_tracer.h",
     ],
     copts = tf_copts(),
-    cuda_deps = tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps(),
+    cuda_deps = if_cuda_is_configured(tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps()),
     visibility = ["//visibility:private"],
     deps = [
         ":core_cpu_internal",

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
@@ -55,7 +55,8 @@ load(
     "if_mkl_ml",
     "mkl_deps",
 )
-load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda", "if_cuda_is_configured")
+load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm", "if_rocm_is_configured")
 
 config_setting(
     # Add "--define tensorflow_xsmm=1" to your build command to use libxsmm for

diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
@@ -17,8 +17,15 @@ load(
 )
 load(
     "@local_config_cuda//cuda:build_defs.bzl",
-    "cuda_default_copts",
     "if_cuda",
+    "if_cuda_is_configured",
+    "cuda_default_copts",
+)
+load(
+    "@local_config_rocm//rocm:build_defs.bzl",
+    "if_rocm",
+    "if_rocm_is_configured",
+    "rocm_default_copts",
 )
 load(
     "//third_party/mkl:build_defs.bzl",
@@ -860,12 +867,14 @@ def tf_cuda_only_cc_test(
         srcs = srcs + tf_binary_additional_srcs(),
         size = size,
         args = args,
-        copts = _cuda_copts() + tf_copts(),
+        copts = _cuda_copts() + _rocm_copts() + tf_copts(),
         data = data + tf_binary_dynamic_kernel_dsos(kernels),
-        deps = deps + tf_binary_dynamic_kernel_deps(kernels) + if_cuda([
-            clean_dep("//tensorflow/core:cuda"),
-            clean_dep("//tensorflow/core:gpu_lib"),
-        ]),
+        deps = deps + tf_binary_dynamic_kernel_deps(kernels) +
+            if_cuda_is_configured([
+                clean_dep("//tensorflow/core:cuda"),
+                clean_dep("//tensorflow/core:gpu_lib")]) +
+            if_rocm_is_configured([
+                clean_dep("//tensorflow/core:gpu_lib")]),
         linkopts = if_not_windows(["-lpthread", "-lm"]) + linkopts + _rpath_linkopts(name),
         linkstatic = linkstatic or select({
             # cc_tests with ".so"s in srcs incorrectly link on Darwin
@@ -1000,7 +1009,7 @@ register_extension_info(
     label_regex_for_dep = "{extension_name}",
 )
 
-def _cuda_copts():
+def _cuda_copts(opts = []):
     """Gets the appropriate set of copts for (maybe) CUDA compilation.
 
       If we're doing CUDA compilation, returns copts for our particular CUDA
@@ -1016,30 +1025,50 @@ def _cuda_copts():
         "@local_config_cuda//cuda:using_clang": ([
             "-fcuda-flush-denormals-to-zero",
         ]),
-    })
+    }) + if_cuda_is_configured(opts)
+
+def _rocm_copts(opts = []):
+    """Gets the appropriate set of copts for (maybe) ROCm compilation.
+
+      If we're doing ROCm compilation, returns copts for our particular ROCm
+      compiler.  If we're not doing ROCm compilation, returns an empty list.
+
+      """
+    return rocm_default_copts() + select({
+        "//conditions:default": [],
+        "@local_config_rocm//rocm:using_hipcc": ([
+            "",
+        ])
+    }) + if_rocm_is_configured(opts)
 
 # Build defs for TensorFlow kernels
 
 # When this target is built using --config=cuda, a cc_library is built
 # that passes -DGOOGLE_CUDA=1 and '-x cuda', linking in additional
 # libraries needed by GPU kernels.
+#
+# When this target is built using --config=rocm, a cc_library is built
+# that passes -DTENSORFLOW_USE_ROCM and '-x rocm', linking in additional
+# libraries needed by GPU kernels.
 def tf_gpu_kernel_library(
         srcs,
         copts = [],
         cuda_copts = [],
         deps = [],
         hdrs = [],
         **kwargs):
-    copts = copts + _cuda_copts() + if_cuda(cuda_copts) + tf_copts()
+    copts = copts + tf_copts() + _cuda_copts(opts = cuda_copts) + _rocm_copts(opts = cuda_copts)
     kwargs["features"] = kwargs.get("features", []) + ["-use_header_modules"]
 
     native.cc_library(
         srcs = srcs,
         hdrs = hdrs,
         copts = copts,
-        deps = deps + if_cuda([
+        deps = deps + if_cuda_is_configured([
             clean_dep("//tensorflow/core:cuda"),
             clean_dep("//tensorflow/core:gpu_lib"),
+        ]) + if_rocm_is_configured([
+            clean_dep("//tensorflow/core:gpu_lib"),
         ]),
         alwayslink = 1,
         **kwargs
@@ -1075,11 +1104,13 @@ def tf_cuda_library(deps = None, cuda_deps = None, copts = tf_copts(), **kwargs)
 
     kwargs["features"] = kwargs.get("features", []) + ["-use_header_modules"]
     native.cc_library(
-        deps = deps + if_cuda(cuda_deps + [
+        deps = deps + if_cuda_is_configured(cuda_deps + [
             clean_dep("//tensorflow/core:cuda"),
-            "@local_config_cuda//cuda:cuda_headers",
+            "@local_config_cuda//cuda:cuda_headers"
+        ]) + if_rocm_is_configured(cuda_deps + [
+            "@local_config_rocm//rocm:rocm_headers"
         ]),
-        copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_mkl(["-DINTEL_MKL=1"]) +
+        copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_rocm(["-DTENSORFLOW_USE_ROCM=1"]) + if_mkl(["-DINTEL_MKL=1"]) +
                  if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) +
                  if_tensorrt(["-DGOOGLE_TENSORRT=1"])),
         **kwargs
@@ -1459,6 +1490,9 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
         "@local_config_cuda//cuda:cuda_headers",
         "@local_config_cuda//cuda:cudart_static",
     ]
+    rocm_deps = [
+        clean_dep("//tensorflow/core:stream_executor_headers_lib"),
+    ]
     deps = deps + tf_custom_op_library_additional_deps()
     if gpu_srcs:
         basename = name.split(".")[0]
@@ -1467,13 +1501,14 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
             srcs = gpu_srcs,
             copts = _cuda_copts() + if_tensorrt(["-DGOOGLE_TENSORRT=1"]),
             features = if_cuda(["-use_header_modules"]),
-            deps = deps + if_cuda(cuda_deps),
+            deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps)
         )
         cuda_deps.extend([":" + basename + "_gpu"])
+        rocm_deps.extend([":" + basename + "_gpu"])
 
     check_deps(
         name = name + "_check_deps",
-        deps = deps + if_cuda(cuda_deps),
+        deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
         disallowed_deps = [
             clean_dep("//tensorflow/core:framework"),
             clean_dep("//tensorflow/core:lib"),
@@ -1482,7 +1517,7 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
     tf_cc_shared_object(
         name = name,
         srcs = srcs,
-        deps = deps + if_cuda(cuda_deps),
+        deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
         data = if_static([name + "_check_deps"]),
         copts = tf_copts(is_external = True),
         features = ["windows_export_all_symbols"],

diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm
@@ -0,0 +1,97 @@
+# This Dockerfile provides a starting point for a ROCm installation of 
+# MIOpen and tensorflow.  
+FROM ubuntu:xenial
+MAINTAINER Jeff Poznanovic <[email protected]>
+
+ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/debian/
+ARG ROCM_PATH=/opt/rocm
+
+ENV DEBIAN_FRONTEND noninteractive
+ENV TF_NEED_ROCM 1
+ENV HOME /root/
+RUN apt update && apt install -y wget software-properties-common 
+
+# Add rocm repository
+RUN apt-get clean all
+RUN wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add -
+RUN sh -c  "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.list.d/rocm.list"
+
+# Install misc pkgs
+RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+  build-essential \
+  clang-3.8 \
+  clang-format-3.8 \
+  clang-tidy-3.8 \
+  cmake \
+  cmake-qt-gui \
+  ssh \
+  curl \
+  apt-utils \
+  pkg-config \
+  g++-multilib \
+  git \
+  libunwind-dev \
+  libfftw3-dev \
+  libelf-dev \
+  libncurses5-dev \
+  libpthread-stubs0-dev \
+  vim \
+  gfortran \
+  libboost-program-options-dev \
+  libssl-dev \
+  libboost-dev \
+  libboost-system-dev \
+  libboost-filesystem-dev \
+  rpm \
+  libnuma-dev \
+  virtualenv \
+  python-pip \
+  python3-pip \
+  wget && \
+  apt-get clean && \
+  rm -rf /var/lib/apt/lists/*
+
+# Install rocm pkgs
+RUN apt-get update --allow-insecure-repositories && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
+    rocm-dev rocm-libs rocm-utils \
+    rocfft miopen-hip miopengemm rocblas hipblas rocrand \
+    rocm-profiler cxlactivitylogger && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN cd ~ && git clone https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP.git
+RUN cd ~/HIP && mkdir -p build && cd build && cmake .. && make package -j && dpkg -i *.deb
+
+ENV HCC_HOME=$ROCM_PATH/hcc
+ENV HIP_PATH=$ROCM_PATH/hip
+ENV OPENCL_ROOT=$ROCM_PATH/opencl
+ENV PATH="$HCC_HOME/bin:$HIP_PATH/bin:${PATH}"
+ENV PATH="$ROCM_PATH/bin:${PATH}"
+ENV PATH="$OPENCL_ROOT/bin:${PATH}"
+
+# Add target file to help determine which device(s) to build for
+RUN echo -e "gfx803\ngfx900" >> /opt/rocm/bin/target.lst
+
+# Setup environment variables, and add those environment variables at the end of ~/.bashrc 
+ARG HCC_HOME=/opt/rocm/hcc
+ARG HIP_PATH=/opt/rocm/hip
+ARG PATH=$HCC_HOME/bin:$HIP_PATH/bin:$PATH
+
+# Copy and run the install scripts.
+COPY install/*.sh /install/
+ARG DEBIAN_FRONTEND=noninteractive
+RUN /install/install_bootstrap_deb_packages.sh
+RUN add-apt-repository -y ppa:openjdk-r/ppa && \
+    add-apt-repository -y ppa:george-edison55/cmake-3.x
+RUN /install/install_deb_packages.sh
+RUN /install/install_pip_packages.sh
+RUN /install/install_bazel.sh
+RUN /install/install_golang.sh
+
+# Set up the master bazelrc configuration file.
+COPY install/.bazelrc /etc/bazel.bazelrc
+
+# Configure the build for our CUDA configuration.
+ENV TF_NEED_ROCM 1
+
diff --git a/tensorflow/tools/ci_build/builds/docker_test.sh b/tensorflow/tools/ci_build/builds/docker_test.sh
@@ -19,7 +19,7 @@
 #
 # Usage: docker_test.sh <IMAGE_TYPE> <TAG> <WHL_PATH>
 # Arguments:
-#   IMAGE_TYPE : Type of the image: (CPU|GPU)
+#   IMAGE_TYPE : Type of the image: (CPU|GPU|ROCM)
 #   TAG        : Docker image tag
 #   WHL_PATH   : Path to the whl file to be installed inside the docker image
 #
@@ -60,6 +60,8 @@ if [[ "${IMAGE_TYPE}" == "cpu" ]]; then
   DOCKERFILE="tensorflow/tools/docker/Dockerfile"
 elif [[ "${IMAGE_TYPE}" == "gpu" ]]; then
   DOCKERFILE="tensorflow/tools/docker/Dockerfile.gpu"
+elif [[ "${IMAGE_TYPE}" == "rocm" ]]; then
+  DOCKERFILE="tensorflow/tools/docker/Dockerfile.rocm"
 else
   die "Unrecognized image type: $1"
 fi
@@ -106,13 +108,16 @@ if [ "${IMAGE_TYPE}" == "gpu" ]; then
   devices=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
   libs=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}')
   GPU_EXTRA_PARAMS="${devices} ${libs}"
+elif [ "${IMAGE_TYPE}" == "rocm" ]; then
+  ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video"
 else
   GPU_EXTRA_PARAMS=""
+  ROCM_EXTRA_PARAMS=""
 fi
 
 # Run docker image with source directory mapped
 docker run -v ${BASE_DIR}:/tensorflow-src -w /tensorflow-src \
-${GPU_EXTRA_PARAMS} \
+${GPU_EXTRA_PARAMS} ${ROCM_EXTRA_PARAMS} \
 "${DOCKER_IMG_TAG}" \
 /bin/bash -c "tensorflow/tools/ci_build/builds/run_pip_tests.sh && "\
 "tensorflow/tools/ci_build/builds/test_tutorials.sh && "\

diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh
@@ -132,6 +132,7 @@ echo "Using Bazel flags: ${BAZEL_FLAGS}"
 PIP_BUILD_TARGET="//tensorflow/tools/pip_package:build_pip_package"
 GPU_FLAG=""
 if [[ ${CONTAINER_TYPE} == "cpu" ]] || \
+   [[ ${CONTAINER_TYPE} == "rocm" ]] || \
    [[ ${CONTAINER_TYPE} == "debian.jessie.cpu" ]]; then
   bazel build ${BAZEL_FLAGS} ${PIP_BUILD_TARGET} || \
       die "Build failed."
@@ -255,7 +256,8 @@ if [[ $(uname) == "Linux" ]]; then
       die "ERROR: Cannot find repaired wheel."
     fi
   # Copy and rename for gpu manylinux as we do not want auditwheel to package in libcudart.so
-  elif [[ ${CONTAINER_TYPE} == "gpu" ]]; then
+  elif [[ ${CONTAINER_TYPE} == "gpu" ]] || \
+       [[ ${CONTAINER_TYPE} == "rocm" ]]; then
     WHL_PATH=${AUDITED_WHL_NAME}
     cp ${WHL_DIR}/${WHL_BASE_NAME} ${WHL_PATH}
     echo "Copied manylinx1 wheel file at ${WHL_PATH}"

diff --git a/tensorflow/tools/ci_build/builds/with_the_same_user b/tensorflow/tools/ci_build/builds/with_the_same_user
@@ -48,6 +48,12 @@ getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \
 usermod -a -G sudo "${CI_BUILD_USER}"
 echo "${CI_BUILD_USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-nopasswd-sudo
 
+if [[ "${TF_NEED_ROCM}" -eq 1 ]]; then
+  # ROCm requires the video group in order to use the GPU for compute. If it
+  # exists on the host, add it to the container.
+  getent group video || addgroup video && adduser "${CI_BUILD_USER}" video
+fi
+
 if [ -e /root/.bazelrc ]; then
   cp /root/.bazelrc "${CI_BUILD_HOME}/.bazelrc"
   chown "${CI_BUILD_UID}:${CI_BUILD_GID}" "${CI_BUILD_HOME}/.bazelrc"