Skip to content

Commit

Permalink
DALI for aarch64-linux platform (#856)
Browse files Browse the repository at this point in the history
Dockerfile with public sources to cross compile DALI for aarch64
Changes to the CMake Dependencies, target CUDA stubs, and protobuf

README for cross compiling

Remove aarch64 masking from jpeg_mem file
Add jpeg library linking for aarch64
Build only static libs for OpenCV and link with them.
Build and link with static libjpeg-turbo - make OpenCV use it

* Proper CMAKE_SYSTEM_PREFIX_PATH and CMAKE_SYSTEM_NAME

Allows to FIND_JPEG (and others) to work.
CMAKE_SYSTEM_NAME for aarch64 forces the CMAKE_CROSSCOPMILING to true
which allows FIND_CUDA and to work properly.

Add support for cross usage of Protobuf in FindPorotbuf

Use proper FIND_CUDA in aarch64

Turn OFF appropriate features in Dockerfile CMD

Fix gencode flags for aarch64 architecture

Signed-off-by: Anurag Dixit <[email protected]>

Co-authored-by: Naren Dasan <[email protected]>
Signed-off-by: Naren Dasan <[email protected]>

Co-authored-by: Krzysztof Lecki <[email protected]>
Signed-off-by: Krzysztof Lecki <[email protected]>
  • Loading branch information
klecki authored May 22, 2019
1 parent 86f8cac commit 6cbd42f
Show file tree
Hide file tree
Showing 24 changed files with 603 additions and 165 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ compile_debug
.idea
cmake-build-*
./data

qnx/
9 changes: 7 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ project(DALI CXX)
option(BUILD_TEST "Build googletest test suite" ON)
option(BUILD_BENCHMARK "Build benchmark suite" ON)
option(BUILD_NVTX "Build with NVTX profiling enabled" OFF)
option(BUILD_PYTHON "Build python bindings" ON)
option(BUILD_PYTHON "Build Python bindings" ON)
option(BUILD_LMDB "Build LMDB readers" OFF)
option(BUILD_TENSORFLOW "Build TensorFlow plugin" OFF)
option(BUILD_JPEG_TURBO "Build with libjpeg-turbo" ON)
Expand Down Expand Up @@ -63,7 +63,12 @@ message(STATUS "Build configuration: " ${CMAKE_BUILD_TYPE})
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/modules)

# Dependencies
include(cmake/Dependencies.cmake)
if(${ARCH} MATCHES "aarch64-linux")
message("Target set to aarch64-linux")
include(cmake/Dependencies.aarch64-linux.cmake)
else()
include(cmake/Dependencies.cmake)
endif()

# add more flags after they are populated by find_package from Dependencies.cmake

Expand Down
152 changes: 152 additions & 0 deletions Dockerfile.build.aarch64-linux
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
FROM nvidia/cuda:10.0-devel-ubuntu16.04

RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
curl \
wget \
unzip \
git \
rsync \
libjpeg-dev \
dh-autoreconf \
gcc-aarch64-linux-gnu \
g++-aarch64-linux-gnu \
&& rm -rf /var/lib/apt/lists/*

ENV REPO_DEBS="cuda-repo-ubuntu1604-10-0-local-10.0.117-410.38_1.0-1_amd64.deb"
ENV CUDA_CROSS_VERSION=10-0
ENV CUDA_CROSS_PACKAGES="cublas cudart cufft curand cusolver cusparse driver misc-headers npp"
#nvml nvrtc nvgraph"

RUN wget https://developer.download.nvidia.com/devzone/devcenter/mobile/jetpack_l4t/4.1.1/xddsn.im/JetPackL4T_4.1.1_b57/16.04/cuda-repo-ubuntu1604-10-0-local-10.0.117-410.38_1.0-1_amd64.deb && \
dpkg -i $REPO_DEBS && \
echo "for i in \$CUDA_CROSS_PACKAGES; do echo \"cuda-\$i-cross-aarch64-\${CUDA_CROSS_VERSION}\";done" | bash > /tmp/cuda-packages.txt && \
apt-get update \
&& apt-get install -y $(cat /tmp/cuda-packages.txt) \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /tmp/cuda-packages.txt

# Boost
RUN BOOST_VERSION=1_66_0 \
&& cd /usr/local \
&& curl -L https://dl.bintray.com/boostorg/release/1.66.0/source/boost_${BOOST_VERSION}.tar.gz | tar -xzf - \
&& ln -s ../boost_${BOOST_VERSION}/boost include/boost

# CMake
RUN CMAKE_VERSION=3.11 && \
CMAKE_BUILD=3.11.0 && \
curl -L https://cmake.org/files/v${CMAKE_VERSION}/cmake-${CMAKE_BUILD}.tar.gz | tar -xzf - && \
cd /cmake-${CMAKE_BUILD} && \
./bootstrap --parallel=$(grep ^processor /proc/cpuinfo | wc -l) && \
make -j"$(grep ^processor /proc/cpuinfo | wc -l)" install && \
rm -rf /cmake-${CMAKE_BUILD}

# protobuf v3.5.1
ENV PROTOBUF_VERSION=3.5.1
RUN curl -L https://github.com/google/protobuf/releases/download/v${PROTOBUF_VERSION}/protobuf-all-${PROTOBUF_VERSION}.tar.gz | tar -xzf - && \
cd /protobuf-${PROTOBUF_VERSION} && \
./autogen.sh && \
./configure CXXFLAGS="-fPIC" --prefix=/usr/local --disable-shared 2>&1 > /dev/null && \
make -j"$(grep ^processor /proc/cpuinfo | wc -l)" install 2>&1 > /dev/null

RUN cd /protobuf-${PROTOBUF_VERSION} && make clean \
./autogen.sh && ./configure \
CXXFLAGS="-fPIC" \
CC=aarch64-linux-gnu-gcc \
CXX=aarch64-linux-gnu-g++ \
--host=aarch64-unknown-linux-gnu \
--with-protoc=/usr/local/bin/protoc \
--prefix=/usr/aarch64-linux-gnu/ && make -j$(nproc) install && \
rm -rf /protobuf-${PROTOBUF_VERSION}


ENV JPEG_TURBO_VERSION=1.5.3
RUN curl -L https://github.com/libjpeg-turbo/libjpeg-turbo/archive/${JPEG_TURBO_VERSION}.tar.gz | tar -xzf - && \
cd /libjpeg-turbo-${JPEG_TURBO_VERSION} && \
autoreconf -fiv && \
./configure \
--disable-shared \
CFLAGS="-fPIC" \
CXXFLAGS="-fPIC" \
CC=aarch64-linux-gnu-gcc \
CXX=aarch64-linux-gnu-g++ \
--host=aarch64-unknown-linux-gnu \
--prefix=/usr/aarch64-linux-gnu/ && \
make -j"$(grep ^processor /proc/cpuinfo | wc -l)" install && \
rm -rf /libjpeg-turbo-${JPEG_TURBO_VERSION}

# OpenCV
ENV OPENCV_VERSION=3.4.3
RUN curl -L https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.tar.gz | tar -xzf - && \
cd /opencv-${OPENCV_VERSION} && mkdir build && cd build && \
cmake -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_TOOLCHAIN_FILE=$PWD/../platforms/linux/aarch64-gnu.toolchain.cmake \
-DCMAKE_INSTALL_PREFIX=/usr/aarch64-linux-gnu/ \
-DBUILD_SHARED_LIBS=OFF \
-DBUILD_LIST=core,improc,imgcodecs \
-DBUILD_PNG=ON \
-DBUILD_TIFF=OFF \
-DBUILD_TBB=OFF \
-DBUILD_WEBP=OFF \
-DBUILD_JPEG=OFF \
-DWITH_JPEG=ON \
-DBUILD_JASPER=OFF \
-DBUILD_ZLIB=ON \
-DBUILD_EXAMPLES=OFF \
-DBUILD_FFMPEG=ON \
-DBUILD_opencv_java=OFF \
-DBUILD_opencv_python2=OFF \
-DBUILD_opencv_python3=OFF \
-DENABLE_NEON=OFF \
-DWITH_PROTOBUF=OFF \
-DWITH_PTHREADS_PF=OFF \
-DWITH_OPENCL=OFF \
-DWITH_OPENMP=OFF \
-DWITH_FFMPEG=OFF \
-DWITH_GSTREAMER=OFF \
-DWITH_GSTREAMER_0_10=OFF \
-DWITH_CUDA=OFF \
-DWITH_GTK=OFF \
-DWITH_VTK=OFF \
-DWITH_TBB=OFF \
-DWITH_1394=OFF \
-DWITH_OPENEXR=OFF \
-DINSTALL_C_EXAMPLES=OFF \
-DINSTALL_TESTS=OFF \
-DVIBRANTE=TRUE \
VERBOSE=1 ../ && \
make -j"$(grep ^processor /proc/cpuinfo | wc -l)" install && \
rm -rf /opencv-${OPENCV_VERSION}

VOLUME /dali

WORKDIR /dali


ENV PATH=/usr/local/cuda-10.0/bin:$PATH

ARG DALI_BUILD_DIR=build_aarch64_linux

WORKDIR /dali/${DALI_BUILD_DIR}

CMD cmake \
-DWERROR=ON \
-DCMAKE_TOOLCHAIN_FILE:STRING="$PWD/../platforms/aarch64-linux/aarch64-linux.toolchain.cmake" \
-DCMAKE_COLOR_MAKEFILE=ON \
-DCMAKE_INSTALL_PREFIX=./install \
-DARCH=aarch64-linux \
-DCUDA_HOST=/usr/local/cuda-10.0 \
-DCUDA_TARGET=/usr/local/cuda-10.0/targets/aarch64-linux \
-DBUILD_TEST=ON \
-DBUILD_BENCHMARK=OFF \
-DBUILD_NVTX=OFF \
-DBUILD_PYTHON=OFF \
-DBUILD_LMDB=OFF \
-DBUILD_TENSORFLOW=OFF \
-DBUILD_JPEG_TURBO=ON \
-DBUILD_NVJPEG=OFF \
-DBUILD_NVOF=OFF \
-DBUILD_NVDEC=OFF \
-DBUILD_NVML=OFF \
.. && \
make -j"$(grep ^processor /proc/cpuinfo | wc -l)"
47 changes: 45 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ Building DALI using Clang (experimental):
.. note::

DALI release packages are built with the options listed above set to ON and NVTX turned OFF.
Testing is done with the same configuration.
Testing is done with the same configuration.
We ensure that DALI compiles with all of those options turned OFF, but there may exist
cross-dependencies between some of those features.

Expand All @@ -362,7 +362,50 @@ Install Python bindings
.. installation-end-marker-do-not-remove
----
Cross-compiling DALI C++ API for aarch64 Linux (Docker)
-------------------------------------------------------
Build the aarch64 Linux Build Container
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

.. code-block:: bash
docker build -t dali_builder:aarch64-linux -f Dockerfile.build.aarch64-linux .
Compile
^^^^^^^
From the root of the DALI source tree

.. code-block:: bash
docker run -v $(pwd):/dali dali_builder:aarch64-linux
The relevant artifacts will be in ``build/install`` and ``build/dali/python/nvidia/dali``

Cross-compiling DALI C++ API for aarch64 QNX (Docker)
-----------------------------------------------------
Setup
^^^^^
After aquiring the QNX Toolchain, place it in a directory called ``qnx`` in the root of the DALI tree.
Then using the SDK Manager for NVIDIA DRIVE, select **QNX** as the *Target Operating System* and select **DRIVE OS 5.1.0.0 SDK**
In STEP 02 under **Download & Install Options**, select *Download Now. Install Later*. and agree to the Terms and Conditions.
Once downloaded move the **cuda-repo-cross-qnx** debian package into the ``qnx`` directory you created in the DALI tree.

Build the aarch64 Build Container
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

.. code-block:: bash
docker build -t dali_builder:aarch64-qnx -f Dockerfile.build.aarch64-qnx .
Compile
^^^^^^^
From the root of the DALI source tree

.. code-block:: bash
docker run -v $(pwd):/dali dali_builder:aarch64-qnx
The relevant artifacts will be in ``build/install`` and ``build/dali/python/nvidia/dali``

Getting started
---------------
Expand Down
6 changes: 5 additions & 1 deletion cmake/CUDA_utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@


# List of currently used arch values
set(CUDA_known_archs "35" "50" "52" "60" "61" "70" "75")
if (${ARCH} MATCHES "aarch64")
set(CUDA_known_archs "53" "62" "72" "75")
else()
set(CUDA_known_archs "35" "50" "52" "60" "61" "70" "75")
endif()

set(CUDA_TARGET_ARCHS ${CUDA_known_archs} CACHE STRING "List of target CUDA architectures")

Expand Down
80 changes: 80 additions & 0 deletions cmake/Dependencies.aarch64-linux.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#############################
# CUDA TOOLKIT
#############################

# TODO(klecki): Setting them directly from command line does not work
set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_HOST})
set(CUDA_TOOLKIT_TARGET_DIR ${CUDA_TARGET})

find_package(CUDA 10.0 REQUIRED)

include_directories(${CUDA_INCLUDE_DIRS})
list(APPEND DALI_LIBS ${CUDA_LIBRARIES})

list(APPEND DALI_EXCLUDES libcudart_static.a)

# NVIDIA NPPC library
find_cuda_helper_libs(nppc_static)
find_cuda_helper_libs(nppicom_static)
find_cuda_helper_libs(nppicc_static)
find_cuda_helper_libs(nppig_static)
list(APPEND DALI_LIBS ${CUDA_nppicom_static_LIBRARY}
${CUDA_nppicc_static_LIBRARY}
${CUDA_nppig_static_LIBRARY})
list(APPEND DALI_EXCLUDES libnppicom_static.a
libnppicc_static.a
libnppig_static.a)
list(APPEND DALI_LIBS ${CUDA_nppc_static_LIBRARY})
list(APPEND DALI_EXCLUDES libnppc_static.a)

# CULIBOS needed when using static CUDA libs
find_cuda_helper_libs(culibos)
list(APPEND DALI_LIBS ${CUDA_culibos_LIBRARY})
list(APPEND DALI_EXCLUDES libculibos.a)

# TODO(klecki): Do we need host includes?
include_directories(${CUDA_TOOLKIT_ROOT_DIR}/include)

# NVTX for profiling
if (BUILD_NVTX)
find_cuda_helper_libs(nvToolsExt)
list(APPEND DALI_LIBS ${CUDA_nvToolsExt_LIBRARY})
add_definitions(-DDALI_USE_NVTX)
endif()

##################################################################
# Common dependencies
##################################################################
include(cmake/Dependencies.common.cmake)

##################################################################
# protobuf
##################################################################
set(Protobuf_CROSS YES)
set(Protobuf_USE_STATIC_LIBS YES)
find_package(Protobuf 2.0 REQUIRED)
if(${Protobuf_VERSION} VERSION_LESS "3.0")
message(STATUS "TensorFlow TFRecord file format support is not available with Protobuf 2")
else()
message(STATUS "Enabling TensorFlow TFRecord file format support")
add_definitions(-DDALI_BUILD_PROTO3=1)
set(BUILD_PROTO3 ON CACHE STRING "Build proto3")
endif()

include_directories(SYSTEM ${Protobuf_INCLUDE_DIRS})
list(APPEND DALI_LIBS ${Protobuf_LIBRARY} ${Protobuf_PROTOC_LIBRARIES} ${Protobuf_LITE_LIBRARIES})
list(APPEND DALI_EXCLUDES libprotobuf.a;libprotobuf-lite.a;libprotoc.a)
Loading

0 comments on commit 6cbd42f

Please sign in to comment.