Skip to content

Commit

Permalink
Fix separate compilation -dc (NVIDIA#433)
Browse files Browse the repository at this point in the history
* Fix separate compilation `-dc`

- when cutlass is included in multiple compilation units
  compiled with `-dc` OOB_NAN_F16x8 device constant is
  instantiated multiple times causing
  Multiple definition of '_ZN7cutlass4arch13OOB_NAN_F16x8E' error
  This PR makes this variable a local constant as it is not
  modified during runtime

Signed-off-by: Janusz Lisiecki <[email protected]>

* Fix

Signed-off-by: Janusz Lisiecki <[email protected]>

* Test GH

Signed-off-by: Janusz Lisiecki <[email protected]>

* Revert test GH

Signed-off-by: Janusz Lisiecki <[email protected]>
  • Loading branch information
JanuszL authored Mar 22, 2022
1 parent 3ab1eac commit 8f1fe7a
Showing 1 changed file with 8 additions and 8 deletions.
16 changes: 8 additions & 8 deletions include/cutlass/arch/memory_sm80.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,11 @@ struct cp_async<SizeInBytes, CacheOperation::Always> {
CUTLASS_DEVICE
cp_async(void *smem_ptr, void const *global_ptr, bool pred_guard = true) {
#if CUDA_CP_ASYNC_ACTIVATED

// Make sure the size is supported.
static_assert((SizeInBytes == 4 || SizeInBytes == 8 || SizeInBytes == 16),
"Size is not supported");

unsigned smem_int_ptr = cutlass_get_smem_pointer(smem_ptr);

asm volatile(
Expand Down Expand Up @@ -135,7 +135,7 @@ struct cp_async_zfill<SizeInBytes, CacheOperation::Always> {
// Make sure the size is supported.
static_assert((SizeInBytes == 4 || SizeInBytes == 8 || SizeInBytes == 16),
"Size is not supported");

unsigned smem_int_ptr = cutlass_get_smem_pointer(smem_ptr);
int src_in_bytes = (pred_guard ? SizeInBytes : 0);

Expand All @@ -162,9 +162,6 @@ struct cp_async_zfill<SizeInBytes, CacheOperation::Always> {
}
};

__device__ __constant__ uint4 OOB_NAN_F16x8 = {0x7eff7eff, 0x7eff7eff,
0x7eff7eff, 0x7eff7eff};

/// Partial specialization
template <>
struct cp_async_nan<16, CacheOperation::Always> {
Expand All @@ -174,7 +171,10 @@ struct cp_async_nan<16, CacheOperation::Always> {
CUTLASS_DEVICE
cp_async_nan(void *smem_ptr, void const *global_ptr, bool pred_guard) {
#if CUDA_CP_ASYNC_ACTIVATED


static __constant__ uint4 OOB_NAN_F16x8 = {0x7eff7eff, 0x7eff7eff,
0x7eff7eff, 0x7eff7eff};

unsigned smem_int_ptr = cutlass_get_smem_pointer(smem_ptr);

asm volatile(
Expand Down Expand Up @@ -216,7 +216,7 @@ struct cp_async<SizeInBytes, CacheOperation::Global> {
CUTLASS_DEVICE
cp_async(void *smem_ptr, void const *global_ptr, bool pred_guard = true) {
#if CUDA_CP_ASYNC_ACTIVATED

static_assert(SizeInBytes == 16,
"cp.async only supports CacheOperation::Global when access size is 16B.");

Expand Down

0 comments on commit 8f1fe7a

Please sign in to comment.