Skip to content

Commit

Permalink
Add additional samples based on CUDA toolkit version 11.7 and cuFile …
Browse files Browse the repository at this point in the history
…library 1.3.0.44
  • Loading branch information
KiranModukuri committed Jun 15, 2022
1 parent 1962e9d commit c1510ca
Show file tree
Hide file tree
Showing 15 changed files with 893 additions and 25 deletions.
4 changes: 2 additions & 2 deletions gds/samples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ release_samples:
@echo "Release samples Built"


build: cufile_sample_001 cufile_sample_002 cufile_sample_003 cufile_sample_004 cufile_sample_005 cufile_sample_006 cufile_sample_007 cufile_sample_008 cufile_sample_009 cufile_sample_010 cufile_sample_011 cufile_sample_012 cufile_sample_013 cufile_sample_014 cufile_sample_015 cufile_sample_016 cufile_sample_017 cufile_sample_018 cufile_sample_001_static cufile_sample_002_static cufile_sample_003_static cufile_sample_004_static cufile_sample_005_static cufile_sample_006_static cufile_sample_007_static cufile_sample_008_static cufile_sample_009_static cufile_sample_010_static cufile_sample_011_static cufile_sample_012_static cufile_sample_013_static cufile_sample_014_static cufile_sample_015_static cufile_sample_016_static cufile_sample_017_static cufile_sample_018_static
build: cufile_sample_001 cufile_sample_002 cufile_sample_003 cufile_sample_004 cufile_sample_005 cufile_sample_006 cufile_sample_007 cufile_sample_008 cufile_sample_009 cufile_sample_010 cufile_sample_011 cufile_sample_012 cufile_sample_013 cufile_sample_014 cufile_sample_015 cufile_sample_016 cufile_sample_017 cufile_sample_018 cufile_sample_019 cufile_sample_001_static cufile_sample_002_static cufile_sample_003_static cufile_sample_004_static cufile_sample_005_static cufile_sample_006_static cufile_sample_007_static cufile_sample_008_static cufile_sample_009_static cufile_sample_010_static cufile_sample_011_static cufile_sample_012_static cufile_sample_013_static cufile_sample_014_static cufile_sample_015_static cufile_sample_016_static cufile_sample_017_static cufile_sample_018_static cufile_sample_019_static cufile_sample_020_static cufile_sample_020 cufile_sample_021 cufile_sample_022

%: %.cc $(CUFILE_INCLUDE_PATH)/cufile.h
$(CC) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
Expand All @@ -78,6 +78,6 @@ install:
cp cufile_sample_*.cc cufile_sample_*.h Makefile README $(INSTALL_GDSSAMPLES_PREFIX)/

clean:
rm -f cufile_sample_001 cufile_sample_001.o cufile_sample_002 cufile_sample_002.o cufile_sample_003 cufile_sample_003.o cufile_sample_004 cufile_sample_004.o cufile_sample_005 cufile_sample_005.o cufile_sample_006 cufile_sample_006.o cufile_sample_007 cufile_sample_007.o cufile_sample_008 cufile_sample_008.o cufile_sample_009 cufile_sample_009.o cufile_sample_010 cufile_sample_010.o cufile_sample_011 cufile_sample_011.o cufile_sample_012 cufile_sample_012.o cufile_sample_013 cufile_sample_013.o cufile_sample_014 cufile_sample_014.o cufile_sample_015 cufile_sample_015.o cufile_sample_016.o cufile_sample_016 cufile_sample_017.o cufile_sample_017 cufile_sample_018.o cufile_sample_018 cufile_sample_001_static cufile_sample_002_static cufile_sample_003_static cufile_sample_004_static cufile_sample_005_static cufile_sample_006_static cufile_sample_007_static cufile_sample_008_static cufile_sample_009_static cufile_sample_010_static cufile_sample_011_static cufile_sample_012_static cufile_sample_013_static cufile_sample_014_static cufile_sample_015_static cufile_sample_016_static cufile_sample_017_static cufile_sample_018_static
rm -f cufile_sample_001 cufile_sample_001.o cufile_sample_002 cufile_sample_002.o cufile_sample_003 cufile_sample_003.o cufile_sample_004 cufile_sample_004.o cufile_sample_005 cufile_sample_005.o cufile_sample_006 cufile_sample_006.o cufile_sample_007 cufile_sample_007.o cufile_sample_008 cufile_sample_008.o cufile_sample_009 cufile_sample_009.o cufile_sample_010 cufile_sample_010.o cufile_sample_011 cufile_sample_011.o cufile_sample_012 cufile_sample_012.o cufile_sample_013 cufile_sample_013.o cufile_sample_014 cufile_sample_014.o cufile_sample_015 cufile_sample_015.o cufile_sample_016.o cufile_sample_016 cufile_sample_017.o cufile_sample_017 cufile_sample_018.o cufile_sample_019.o cufile_sample_018 cufile_sample_001_static cufile_sample_002_static cufile_sample_003_static cufile_sample_004_static cufile_sample_005_static cufile_sample_006_static cufile_sample_007_static cufile_sample_008_static cufile_sample_009_static cufile_sample_010_static cufile_sample_011_static cufile_sample_012_static cufile_sample_013_static cufile_sample_014_static cufile_sample_015_static cufile_sample_016_static cufile_sample_017_static cufile_sample_018_static cufile_sample_019_static cufile_sample_019 cufile_sample_020_static cufile_sample_020 cufile_sample_021 cufile_sample_022

.PHONY : build install clean
21 changes: 21 additions & 0 deletions gds/samples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,24 @@ buffer offsets of a memory allocated using single allocation and single buffer r
``` bash
./cufile_sample_018 <file-path>
```

**Note**: Following samples need cuFile library version 11.6 and above
**cufile_sample_019**: This sample shows the usage of cuFile Batch API for writes.
``` bash
./cufile_sample_019 <file-path> <gpuid> <num batch entries>
```

**cufile_sample_020**: This sample shows the usage of cuFile Batch API for reads.
``` bash
./cufile_sample_020 <file-path> <gpuid> <num batch entries>
```

**cufile_sample_021**: This sample shows the usage of cuFile Batch API to cancel I/O after submitting a batch read.
``` bash
./cufile_sample_021 <file-path> <gpuid> <num batch entries>
```

**cufile_sample_022**: This sample shows the usage of cuFile Batch API to perform cuFileBatchIOGetStatus after submitting a batch read.
``` bash
./cufile_sample_022 <file-path> <gpuid> <num batch entries>
```
1 change: 1 addition & 0 deletions gds/samples/cufile_sample_001.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ int main(int argc, char *argv[]) {
check_cudaruntimecall(cudaMalloc(&devPtr, size));
// filler
check_cudaruntimecall(cudaMemset((void*)(devPtr), 0xab, size));
check_cudaruntimecall(cudaStreamSynchronize(0));

std::cout << "registering device memory of size :" << size << std::endl;
// registers device memory
Expand Down
2 changes: 1 addition & 1 deletion gds/samples/cufile_sample_002.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ int main(int argc, char *argv[]) {

// filler for device memory
check_cudaruntimecall(cudaMemset(devPtr, 0xab, size));

check_cudaruntimecall(cudaStreamSynchronize(0));
check_cudaruntimecall(cudaGetDevice(&idx));

std::cout << "writing from gpuid: " << idx << std::endl;
Expand Down
2 changes: 1 addition & 1 deletion gds/samples/cufile_sample_003.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ int main(int argc, char *argv[]) {
check_cudaruntimecall(cudaMalloc(&devPtr, size));
// special case for holes
check_cudaruntimecall(cudaMemset(devPtr, 0, size));

check_cudaruntimecall(cudaStreamSynchronize(0));
std::cout << "reading file to device memory :" << TEST_READWRITEFILE
<< std::endl;

Expand Down
1 change: 1 addition & 0 deletions gds/samples/cufile_sample_004.cc
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ int main(int argc, char *argv[]) {
check_cudadrivercall(cuCtxCreate(&cuCtx, 0, cudev));
check_cudadrivercall(cuMemAlloc(&cudevPtr, size));
check_cudadrivercall(cuMemsetD8(cudevPtr, 0x0, size));
check_cudadrivercall(cuStreamSynchronize(0));

std::cout << "reading file to device memory :"
<< TEST_READWRITEFILE << std::endl;
Expand Down
2 changes: 1 addition & 1 deletion gds/samples/cufile_sample_006.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ int main(int argc, char *argv[]) {

check_cudaruntimecall(cudaMalloc(&devPtr, size));
check_cudaruntimecall(cudaMemset(devPtr, 0x00, size));

check_cudaruntimecall(cudaStreamSynchronize(0));
std::cout << "reading file sequentially :" << TESTFILE
<< " chunk size : " << CHUNK_SIZE << std::endl;
do {
Expand Down
2 changes: 1 addition & 1 deletion gds/samples/cufile_sample_014.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ int main(int argc, char *argv[]) {
size = std::min(size, MAX_BUFFER_SIZE);
check_cudaruntimecall(cudaMalloc(&devPtr, size));
check_cudaruntimecall(cudaMemset((void*)(devPtr), 0x00, size));

check_cudaruntimecall(cudaStreamSynchronize(0));
std::cout << "registering device memory of size :" << size << std::endl;
status = cuFileBufRegister(devPtr, size, 0);
if (status.err != CU_FILE_SUCCESS) {
Expand Down
1 change: 1 addition & 0 deletions gds/samples/cufile_sample_015.cc
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ int main(int argc, char *argv[]) {
assert((devPtr = alloc_memory(size, mem_type)) != nullptr);
// special case for holes
assert(cudaMemset(devPtr, 0, size) == cudaSuccess);
check_cudaruntimecall(cudaStreamSynchronize(0));

std::cout << "reading file to device memory :" << TEST_READWRITEFILE
<< std::endl;
Expand Down
58 changes: 44 additions & 14 deletions gds/samples/cufile_sample_018.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <assert.h>

// CUDA includes
#include <cuda.h>
Expand All @@ -36,6 +35,7 @@

#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
#define ALIGN_DOWN(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1))
#define MAX_RETRY 3

//Macro for checking cuda errors following a cuda launch or api call
#define cudaCheckError() { \
Expand All @@ -59,6 +59,7 @@ static void *read_thread_fn(void *data)
{
int ret;
thread_data_t *t = (thread_data_t *)data;
int cnt;

cudaSetDevice(0);
cudaCheckError();
Expand All @@ -72,18 +73,31 @@ static void *read_thread_fn(void *data)
fl.l_len = ALIGN_UP(t->size, PAGE_SIZE);

// Acquire lock at 4K boundary
if (fcntl(t->fd, F_SETLKW, &fl) == -1) {
printf("Failed to acquire read lock from offset %ld size %ld errno %d\n",
(unsigned long) fl.l_start, (unsigned long) fl.l_len, errno);
exit(1);
}
cnt = 0;
while (1) {
cnt++;
if (fcntl(t->fd, F_SETLKW, &fl) == -1) {
printf("Failed to acquire read lock from offset %ld size %ld errno %d\n",
(unsigned long) fl.l_start, (unsigned long) fl.l_len, errno);
if (cnt == MAX_RETRY) {
exit(1);
} else {
printf("Retrying fcntl for read..\n");
}
} else {
break;
}
}

printf("Read lock acquired from offset %ld size %ld. Submit read at offset %ld size %ld\n",
(unsigned long) fl.l_start, (unsigned long) fl.l_len,
(unsigned long) t->offset, (unsigned long) t->size);

ret = cuFileRead(t->cfr_handle, t->devPtr, t->size, t->offset, 0);
assert(ret > 0);
if (ret < 0) {
perror("cuFileRead Failed");
exit(1);
}

fl.l_type = F_UNLCK; /* set to unlock same region */
if (fcntl(t->fd, F_SETLKW, &fl) == -1) {
Expand All @@ -101,6 +115,7 @@ static void *write_thread_fn(void *data)
{
int ret;
thread_data_t *t = (thread_data_t *)data;
int cnt;

/*
* We need to set the CUDA device; threads will not inherit main thread's
Expand All @@ -120,19 +135,31 @@ static void *write_thread_fn(void *data)
// Acquire lock at 4K boundary
fl.l_start = ALIGN_DOWN(t->offset, PAGE_SIZE);
fl.l_len = ALIGN_UP(t->size, PAGE_SIZE);
cnt = 0;
while (1) {
cnt++;
if (fcntl(t->fd, F_SETLKW, &fl) == -1) {
printf("Failed to acquire write lock from offset %ld size %ld errno %d\n",
(unsigned long) fl.l_start, (unsigned long) fl.l_len, errno);
if (cnt == MAX_RETRY) {
exit(1);
} else {
printf("Retrying fcntl for write..\n");
}
} else {
break;
}
}

if (fcntl(t->fd, F_SETLKW, &fl) == -1) {
printf("Failed to acquire write lock from offset %ld size %ld errno %d\n",
(unsigned long) fl.l_start, (unsigned long) fl.l_len, errno);
exit(1);
}

printf("Write lock acquired from offset %ld size %ld. Submit write at offset %ld size %ld\n",
(unsigned long) fl.l_start, (unsigned long) fl.l_len,
(unsigned long) t->offset, (unsigned long) t->size);

ret = cuFileWrite(t->cfr_handle, t->devPtr, t->size, t->offset, 0);
assert(ret > 0);
if (ret < 0) {
perror("cuFileWrite Failed");
exit(1);
}

fl.l_type = F_UNLCK; /* set to unlock same region */
if (fcntl(t->fd, F_SETLKW, &fl) == -1) {
Expand Down Expand Up @@ -194,6 +221,9 @@ int main(int argc, char **argv) {
cudaMemset(devPtr, 0xab, KB(4));
cudaCheckError();

cudaStreamSynchronize(0);
cudaCheckError();

// Thread 0 will write to file from offset 10 - write size 100 bytes
// This is an unaligned write as offset is not 4K aligned. GDS will
// convert this write to Read-Modify-Write
Expand Down
Loading

0 comments on commit c1510ca

Please sign in to comment.