Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into mahilleb/cuDNN5
Browse files Browse the repository at this point in the history
  • Loading branch information
mahilleb-msft committed Aug 26, 2016
2 parents 2ce3ed0 + 6ee7ee7 commit 54b0770
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
7 changes: 7 additions & 0 deletions CNTK.Cpp.props
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
<NvmlLibPath>"c:\Program Files\NVIDIA Corporation\GDK\gdk_win7_amd64_release\nvml\lib"</NvmlLibPath>

<CudaVersion/>
<CudaVersion Condition="Exists('$(CUDA_PATH_V8_0)') And '$(CudaVersion)' == ''">8.0</CudaVersion>
<CudaVersion Condition="Exists('$(CUDA_PATH_V7_5)') And '$(CudaVersion)' == ''">7.5</CudaVersion>
<CudaVersion Condition="Exists('$(CUDA_PATH_V7_0)') And '$(CudaVersion)' == ''">7.0</CudaVersion>

Expand Down Expand Up @@ -87,6 +88,12 @@
<OpenCvBinPath>$(OpenCvPath)\x64\vc12\bin</OpenCvBinPath>
</PropertyGroup>

<PropertyGroup Condition="'$(CudaVersion)' == '8.0'">
<CudaPath>$(CUDA_PATH_V8_0)</CudaPath>
<CudaRuntimeDll>cudart64_80.dll</CudaRuntimeDll>
<CudaDlls>cublas64_80.dll;cusparse64_80.dll;curand64_80.dll;$(CudaRuntimeDll)</CudaDlls>
</PropertyGroup>

<PropertyGroup Condition="'$(CudaVersion)' == '7.5'">
<CudaPath>$(CUDA_PATH_V7_5)</CudaPath>
<CudaRuntimeDll>cudart64_75.dll</CudaRuntimeDll>
Expand Down
6 changes: 1 addition & 5 deletions Source/Math/GPUMatrixCUDAKernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,8 @@

#define IDX2C(i, j, ld) (((j) * (ld)) + (i)) // 0 based indexing

// TODO: This condition seems wrong, it should be:
// !defined(__CUDA_ARCH__) || __CUDA_ARCH__ < 600
// NVIDIA should fix their CUDA 8.0 headers
// On older GPUs, CUDA atomicAdd() only exists for 'float'. This is the 'double' version.
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
// CUDA atomicAdd() only exists for 'float'. This is the 'double' version.
// TODO: This may need to be guarded by CUDA version; newer devices may support this.
static __inline__ __device__ double atomicAdd(double* address, double val)
{
unsigned long long int* address_as_ull = (unsigned long long int*) address;
Expand Down

0 comments on commit 54b0770

Please sign in to comment.