forked from conda-forge/staged-recipes
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request conda-forge#9694 from jakirkham/add_nccl_final
Add NCCL
- Loading branch information
Showing
3 changed files
with
109 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
From e3c8188d461ebb45312ccbf23a81613192e00189 Mon Sep 17 00:00:00 2001 | ||
From: John Kirkham <[email protected]> | ||
Date: Tue, 7 May 2019 17:29:39 -0400 | ||
Subject: [PATCH] Allow CUDA runtime library selection | ||
|
||
Makes a change to allow the user to select between the static CUDA | ||
runtime library (default) and the dynamic CUDA runtime library. Does | ||
this by allowing `CUDARTLIB` to be overridden. | ||
--- | ||
src/Makefile | 3 ++- | ||
1 file changed, 2 insertions(+), 1 deletion(-) | ||
|
||
diff --git a/src/Makefile b/src/Makefile | ||
index 2d32dca78..bf5429cad 100644 | ||
--- a/src/Makefile | ||
+++ b/src/Makefile | ||
@@ -23,13 +23,14 @@ INCDIR := $(BUILDDIR)/include | ||
LIBDIR := $(BUILDDIR)/lib | ||
OBJDIR := $(BUILDDIR)/obj | ||
##### target files | ||
+CUDARTLIB ?= cudart_static | ||
INCTARGETS := $(INCEXPORTS:%=$(INCDIR)/%) | ||
LIBSONAME := $(LIBNAME:%=%.$(NCCL_MAJOR)) | ||
LIBTARGET := $(LIBNAME:%=%.$(NCCL_MAJOR).$(NCCL_MINOR).$(NCCL_PATCH)) | ||
STATICLIBTARGET := $(STATICLIBNAME) | ||
LIBOBJ := $(LIBSRCFILES:%.cc=$(OBJDIR)/%.o) | ||
DEPFILES := $(LIBOBJ:%.o=%.d) | ||
-LDFLAGS += -L${CUDA_LIB} -lcudart_static -lpthread -lrt -ldl | ||
+LDFLAGS += -L${CUDA_LIB} -l$(CUDARTLIB) -lpthread -lrt -ldl | ||
|
||
DEVICELIB := $(BUILDDIR)/obj/collectives/device/colldevice.a | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#!/bin/bash | ||
|
||
################################################ | ||
# Use `asm-generic/socket.h` from distro. # | ||
# Needed to ensure `SO_REUSEPORT` is defined. # | ||
# This feature was added in Linux kernel 3.9. # | ||
# However RHEL 6 and CentOS 6 backported it. # | ||
# The sysroot compilers get in the way so we # | ||
# use this header from this system to define # | ||
# `SO_REUSEPORT`. # | ||
# # | ||
# ref: https://lwn.net/Articles/542629/ # | ||
################################################ | ||
CONDA_BUILD_SYSROOT="$(${CC} --print-sysroot)" | ||
cp /usr/include/asm-generic/socket.h "${CONDA_BUILD_SYSROOT}/usr/include/asm-generic/socket.h" | ||
|
||
make -j${CPU_COUNT} CUDA_HOME="${CUDA_HOME}" CUDARTLIB="cudart" | ||
make install PREFIX="${PREFIX}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
{% set name = "nccl" %} | ||
{% set version = "2.4.6" %} | ||
{% set revision = "1" %} | ||
|
||
package: | ||
name: {{ name|lower }} | ||
version: {{ version }}.{{ revision }} | ||
|
||
source: | ||
url: https://github.com/NVIDIA/nccl/archive/v{{ version }}-{{ revision }}.tar.gz | ||
sha256: ea4421061a7b9c454f2e088f68bfdbbcefab80ce81cafc70ee6c7742b1439591 | ||
patches: | ||
######################################################################## | ||
# Patch to allow selection of static or dynamic CUDA runtime library. # | ||
# # | ||
# xref: https://github.com/NVIDIA/nccl/pull/220 # | ||
######################################################################## | ||
- PR_220.patch | ||
|
||
build: | ||
number: 0 | ||
skip: true # [(not linux64) or (cuda_compiler_version == "None")] | ||
run_exports: | ||
# xref: https://github.com/NVIDIA/nccl/issues/218 | ||
- {{ pin_subpackage(name, max_pin="x") }} | ||
|
||
requirements: | ||
build: | ||
- {{ compiler("c") }} | ||
- {{ compiler("cxx") }} | ||
- {{ compiler("cuda") }} | ||
- make | ||
|
||
test: | ||
commands: | ||
- test -f "${PREFIX}/include/nccl.h" | ||
- test -f "${PREFIX}/lib/libnccl.so" | ||
- test -f "${PREFIX}/lib/libnccl_static.a" | ||
|
||
about: | ||
home: https://developer.nvidia.com/nccl | ||
license: BSD-3-Clause | ||
license_family: BSD | ||
license_file: LICENSE.txt | ||
summary: Optimized primitives for collective multi-GPU communication | ||
|
||
description: | | ||
The NVIDIA Collective Communications Library (NCCL) implements multi-GPU | ||
and multi-node collective communication primitives that are performance | ||
optimized for NVIDIA GPUs. NCCL provides routines such as all-gather, | ||
all-reduce, broadcast, reduce, reduce-scatter, that are optimized to | ||
achieve high bandwidth over PCIe and NVLink high-speed interconnect. | ||
doc_url: https://docs.nvidia.com/deeplearning/sdk/nccl-developer-guide/docs/index.html | ||
dev_url: https://github.com/NVIDIA/nccl | ||
|
||
extra: | ||
recipe-maintainers: | ||
- jakirkham |