Skip to content

Commit

Permalink
Merge pull request conda-forge#9694 from jakirkham/add_nccl_final
Browse files Browse the repository at this point in the history
Add NCCL
  • Loading branch information
jakirkham authored Oct 22, 2019
2 parents 59309a0 + 38626dd commit aecde18
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 0 deletions.
32 changes: 32 additions & 0 deletions recipes/nccl/PR_220.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
From e3c8188d461ebb45312ccbf23a81613192e00189 Mon Sep 17 00:00:00 2001
From: John Kirkham <[email protected]>
Date: Tue, 7 May 2019 17:29:39 -0400
Subject: [PATCH] Allow CUDA runtime library selection

Makes a change to allow the user to select between the static CUDA
runtime library (default) and the dynamic CUDA runtime library. Does
this by allowing `CUDARTLIB` to be overridden.
---
src/Makefile | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Makefile b/src/Makefile
index 2d32dca78..bf5429cad 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -23,13 +23,14 @@ INCDIR := $(BUILDDIR)/include
LIBDIR := $(BUILDDIR)/lib
OBJDIR := $(BUILDDIR)/obj
##### target files
+CUDARTLIB ?= cudart_static
INCTARGETS := $(INCEXPORTS:%=$(INCDIR)/%)
LIBSONAME := $(LIBNAME:%=%.$(NCCL_MAJOR))
LIBTARGET := $(LIBNAME:%=%.$(NCCL_MAJOR).$(NCCL_MINOR).$(NCCL_PATCH))
STATICLIBTARGET := $(STATICLIBNAME)
LIBOBJ := $(LIBSRCFILES:%.cc=$(OBJDIR)/%.o)
DEPFILES := $(LIBOBJ:%.o=%.d)
-LDFLAGS += -L${CUDA_LIB} -lcudart_static -lpthread -lrt -ldl
+LDFLAGS += -L${CUDA_LIB} -l$(CUDARTLIB) -lpthread -lrt -ldl

DEVICELIB := $(BUILDDIR)/obj/collectives/device/colldevice.a

18 changes: 18 additions & 0 deletions recipes/nccl/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash

################################################
# Use `asm-generic/socket.h` from distro. #
# Needed to ensure `SO_REUSEPORT` is defined. #
# This feature was added in Linux kernel 3.9. #
# However RHEL 6 and CentOS 6 backported it. #
# The sysroot compilers get in the way so we #
# use this header from this system to define #
# `SO_REUSEPORT`. #
# #
# ref: https://lwn.net/Articles/542629/ #
################################################
CONDA_BUILD_SYSROOT="$(${CC} --print-sysroot)"
cp /usr/include/asm-generic/socket.h "${CONDA_BUILD_SYSROOT}/usr/include/asm-generic/socket.h"

make -j${CPU_COUNT} CUDA_HOME="${CUDA_HOME}" CUDARTLIB="cudart"
make install PREFIX="${PREFIX}"
59 changes: 59 additions & 0 deletions recipes/nccl/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{% set name = "nccl" %}
{% set version = "2.4.6" %}
{% set revision = "1" %}

package:
name: {{ name|lower }}
version: {{ version }}.{{ revision }}

source:
url: https://github.com/NVIDIA/nccl/archive/v{{ version }}-{{ revision }}.tar.gz
sha256: ea4421061a7b9c454f2e088f68bfdbbcefab80ce81cafc70ee6c7742b1439591
patches:
########################################################################
# Patch to allow selection of static or dynamic CUDA runtime library. #
# #
# xref: https://github.com/NVIDIA/nccl/pull/220 #
########################################################################
- PR_220.patch

build:
number: 0
skip: true # [(not linux64) or (cuda_compiler_version == "None")]
run_exports:
# xref: https://github.com/NVIDIA/nccl/issues/218
- {{ pin_subpackage(name, max_pin="x") }}

requirements:
build:
- {{ compiler("c") }}
- {{ compiler("cxx") }}
- {{ compiler("cuda") }}
- make

test:
commands:
- test -f "${PREFIX}/include/nccl.h"
- test -f "${PREFIX}/lib/libnccl.so"
- test -f "${PREFIX}/lib/libnccl_static.a"

about:
home: https://developer.nvidia.com/nccl
license: BSD-3-Clause
license_family: BSD
license_file: LICENSE.txt
summary: Optimized primitives for collective multi-GPU communication

description: |
The NVIDIA Collective Communications Library (NCCL) implements multi-GPU
and multi-node collective communication primitives that are performance
optimized for NVIDIA GPUs. NCCL provides routines such as all-gather,
all-reduce, broadcast, reduce, reduce-scatter, that are optimized to
achieve high bandwidth over PCIe and NVLink high-speed interconnect.
doc_url: https://docs.nvidia.com/deeplearning/sdk/nccl-developer-guide/docs/index.html
dev_url: https://github.com/NVIDIA/nccl

extra:
recipe-maintainers:
- jakirkham

0 comments on commit aecde18

Please sign in to comment.