Skip to content

Commit

Permalink
Update Exercise 4
Browse files Browse the repository at this point in the history
  • Loading branch information
tomdeakin committed Nov 24, 2014
1 parent 62bdcc7 commit 5b6e4ff
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 99 deletions.
4 changes: 2 additions & 2 deletions Exercises/Exercise04/C/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ endif

CCFLAGS += -D DEVICE=$(DEVICE)

vadd: vadd_c.c $(COMMON_DIR)/wtime.c $(COMMON_DIR)/err_code.c $(COMMON_DIR)/device_info.c
$(CC) $^ $(CCFLAGS) $(LIBS) -o $@
vadd: vadd_c.c $(COMMON_DIR)/wtime.c $(COMMON_DIR)/device_info.c
$(CC) $^ $(CCFLAGS) $(LIBS) -I $(COMMON_DIR) -o $@


clean:
Expand Down
152 changes: 63 additions & 89 deletions Exercises/Exercise04/C/vadd_c.c
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
//------------------------------------------------------------------------------
//
// Name: vadd.c
//
//
// Purpose: Elementwise addition of two vectors (c = a + b)
//
// HISTORY: Written by Tim Mattson, December 2009
// Updated by Tom Deakin and Simon McIntosh-Smith, October 2012
// Updated by Tom Deakin, July 2013
//
// Updated by Tom Deakin, October 2014
//
//------------------------------------------------------------------------------


Expand All @@ -21,7 +22,9 @@
#include <CL/cl.h>
#endif

//pick up device type from compiler command line or from
#include "err_code.h"

//pick up device type from compiler command line or from
//the default type
#ifndef DEVICE
#define DEVICE CL_DEVICE_TYPE_DEFAULT
Expand All @@ -30,7 +33,7 @@

extern double wtime(); // returns time since some fixed past point (wtime.c)
extern int output_device_info(cl_device_id );
char* err_code (cl_int);


//------------------------------------------------------------------------------

Expand All @@ -39,15 +42,15 @@ char* err_code (cl_int);

//------------------------------------------------------------------------------
//
// kernel: vadd
// kernel: vadd
//
// Purpose: Compute the elementwise sum c = a+b
//
//
// input: a and b float vectors of length count
//
// output: c float vector of length count holding the sum a + b
//

const char *KernelSource = "\n" \
"__kernel void vadd( \n" \
" __global float* a, \n" \
Expand All @@ -67,51 +70,50 @@ const char *KernelSource = "\n" \
int main(int argc, char** argv)
{
int err; // error code returned from OpenCL calls
float h_a[LENGTH]; // a vector
float h_b[LENGTH]; // b vector
float h_c[LENGTH]; // c vector (a+b) returned from the compute device
unsigned int correct; // number of correct results

size_t global; // global domain size
float* h_a = (float*) calloc(LENGTH, sizeof(float)); // a vector
float* h_b = (float*) calloc(LENGTH, sizeof(float)); // b vector
float* h_c = (float*) calloc(LENGTH, sizeof(float)); // c vector (a+b) returned from the compute device

unsigned int correct; // number of correct results

size_t global; // global domain size

cl_device_id device_id; // compute device id
cl_device_id device_id; // compute device id
cl_context context; // compute context
cl_command_queue commands; // compute command queue
cl_program program; // compute program
cl_kernel ko_vadd; // compute kernel

cl_mem d_a; // device memory used for the input a vector
cl_mem d_b; // device memory used for the input b vector
cl_mem d_c; // device memory used for the output c vector

// Fill vectors a and b with random float values
int i = 0;
int count = LENGTH;
for(i = 0; i < count; i++){
h_a[i] = rand() / (float)RAND_MAX;
h_b[i] = rand() / (float)RAND_MAX;
}

// Set up platform and GPU device

cl_uint numPlatforms;

// Find number of platforms
err = clGetPlatformIDs(0, NULL, &numPlatforms);
if (err != CL_SUCCESS || numPlatforms <= 0)
checkError(err, "Finding platforms");
if (numPlatforms == 0)
{
printf("Error: Failed to find a platform!\n%s\n",err_code(err));
printf("Found 0 platforms!\n");
return EXIT_FAILURE;
}

// Get all platforms
cl_platform_id Platform[numPlatforms];
err = clGetPlatformIDs(numPlatforms, Platform, NULL);
if (err != CL_SUCCESS || numPlatforms <= 0)
{
printf("Error: Failed to get the platform!\n%s\n",err_code(err));
return EXIT_FAILURE;
}
checkError(err, "Getting platforms");

// Secure a GPU
for (i = 0; i < numPlatforms; i++)
Expand All @@ -124,38 +126,24 @@ int main(int argc, char** argv)
}

if (device_id == NULL)
{
printf("Error: Failed to create a device group!\n%s\n",err_code(err));
return EXIT_FAILURE;
}
checkError(err, "Finding a device");

err = output_device_info(device_id);

// Create a compute context
checkError(err, "Printing device output");

// Create a compute context
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
if (!context)
{
printf("Error: Failed to create a compute context!\n%s\n", err_code(err));
return EXIT_FAILURE;
}
checkError(err, "Creating context");

// Create a command queue
commands = clCreateCommandQueue(context, device_id, 0, &err);
if (!commands)
{
printf("Error: Failed to create a command commands!\n%s\n", err_code(err));
return EXIT_FAILURE;
}
checkError(err, "Creating command queue");

// Create the compute program from the source buffer
program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err);
if (!program)
{
printf("Error: Failed to create compute program!\n%s\n", err_code(err));
return EXIT_FAILURE;
}
checkError(err, "Creating program");

// Build the program
// Build the program
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
Expand All @@ -168,64 +156,46 @@ int main(int argc, char** argv)
return EXIT_FAILURE;
}

// Create the compute kernel from the program
// Create the compute kernel from the program
ko_vadd = clCreateKernel(program, "vadd", &err);
if (!ko_vadd || err != CL_SUCCESS)
{
printf("Error: Failed to create compute kernel!\n%s\n", err_code(err));
return EXIT_FAILURE;
}
checkError(err, "Creating kernel");

// Create the input (a, b) and output (c) arrays in device memory
d_a = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * count, NULL, NULL);
d_b = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * count, NULL, NULL);
d_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, NULL);
if (!d_a || !d_b || !d_c)
{
printf("Error: Failed to allocate device memory!\n");
exit(1);
}
// Write a and b vectors into compute device memory
d_a = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * count, NULL, &err);
checkError(err, "Creating buffer d_a");

d_b = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * count, NULL, &err);
checkError(err, "Creating buffer d_b");

d_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, &err);
checkError(err, "Creating buffer d_c");

// Write a and b vectors into compute device memory
err = clEnqueueWriteBuffer(commands, d_a, CL_TRUE, 0, sizeof(float) * count, h_a, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to write h_a to source array!\n%s\n", err_code(err));
exit(1);
}
checkError(err, "Copying h_a to device at d_a");

err = clEnqueueWriteBuffer(commands, d_b, CL_TRUE, 0, sizeof(float) * count, h_b, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
printf("Error: Failed to write h_b to source array!\n%s\n", err_code(err));
exit(1);
}

checkError(err, "Copying h_b to device at d_b");

// Set the arguments to our compute kernel
err = clSetKernelArg(ko_vadd, 0, sizeof(cl_mem), &d_a);
err |= clSetKernelArg(ko_vadd, 1, sizeof(cl_mem), &d_b);
err |= clSetKernelArg(ko_vadd, 2, sizeof(cl_mem), &d_c);
err |= clSetKernelArg(ko_vadd, 3, sizeof(unsigned int), &count);
if (err != CL_SUCCESS)
{
printf("Error: Failed to set kernel arguments!\n");
exit(1);
}
checkError(err, "Setting kernel arguments");

double rtime = wtime();

// Execute the kernel over the entire range of our 1d input data set
// letting the OpenCL runtime choose the work-group size
global = count;
err = clEnqueueNDRangeKernel(commands, ko_vadd, 1, NULL, &global, NULL, 0, NULL, NULL);
if (err)
{
printf("Error: Failed to execute kernel!\n%s\n", err_code(err));
return EXIT_FAILURE;
}
checkError(err, "Enqueueing kernel");

// Wait for the commands to complete before stopping the timer
clFinish(commands);
err = clFinish(commands);
checkError(err, "Waiting for kernel to finish");

rtime = wtime() - rtime;
printf("\nThe kernel ran in %lf seconds\n",rtime);

Expand All @@ -236,11 +206,11 @@ int main(int argc, char** argv)
printf("Error: Failed to read output array!\n%s\n", err_code(err));
exit(1);
}

// Test the results
correct = 0;
float tmp;

for(i = 0; i < count; i++)
{
tmp = h_a[i] + h_b[i]; // assign element i of a+b to tmp
Expand All @@ -251,10 +221,10 @@ int main(int argc, char** argv)
printf(" tmp %f h_a %f h_b %f h_c %f \n",tmp, h_a[i], h_b[i], h_c[i]);
}
}
// summarize results

// summarise results
printf("C = A+B: %d out of %d results were correct.\n", correct, count);

// cleanup then shutdown
clReleaseMemObject(d_a);
clReleaseMemObject(d_b);
Expand All @@ -264,6 +234,10 @@ int main(int argc, char** argv)
clReleaseCommandQueue(commands);
clReleaseContext(context);

free(h_a);
free(h_b);
free(h_c);

return 0;
}

4 changes: 2 additions & 2 deletions Exercises/Exercise04/Cpp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ endif

CPP_COMMON = ../../Cpp_common

CCFLAGS=-std=c++11
CCFLAGS=

INC = -I $(CPP_COMMON)

Expand All @@ -29,7 +29,7 @@ endif

CCFLAGS += -D DEVICE=$(DEVICE)

vadd: vadd.cpp $(CPP_COMMON)/err_code.c
vadd: vadd.cpp
$(CPPC) $^ $(INC) $(CCFLAGS) $(LIBS) -o $@


Expand Down
12 changes: 6 additions & 6 deletions Exercises/Exercise04/Cpp/vadd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#define DEVICE CL_DEVICE_TYPE_DEFAULT
#endif

char* err_code(cl_int);
#include <err_code.h>

//------------------------------------------------------------------------------

Expand All @@ -43,7 +43,7 @@ int main(void)
{
std::vector<float> h_a(LENGTH); // a vector
std::vector<float> h_b(LENGTH); // b vector
std::vector<float> h_c (LENGTH, 0xdeadbeef); // c = a + b, from compute device
std::vector<float> h_c(LENGTH, 0xdeadbeef); // c = a + b, from compute device

cl::Buffer d_a; // device memory used for the input a vector
cl::Buffer d_b; // device memory used for the input b vector
Expand Down Expand Up @@ -71,10 +71,10 @@ int main(void)

// Create the kernel functor

auto vadd = cl::make_kernel<cl::Buffer, cl::Buffer, cl::Buffer, int>(program, "vadd");
cl::make_kernel<cl::Buffer, cl::Buffer, cl::Buffer, int> vadd(program, "vadd");

d_a = cl::Buffer(context, begin(h_a), end(h_a), true);
d_b = cl::Buffer(context, begin(h_b), end(h_b), true);
d_a = cl::Buffer(context, h_a.begin(), h_a.end(), true);
d_b = cl::Buffer(context, h_b.begin(), h_b.end(), true);

d_c = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * LENGTH);

Expand All @@ -94,7 +94,7 @@ int main(void)
double rtime = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0;
printf("\nThe kernels ran in %lf seconds\n", rtime);

cl::copy(queue, d_c, begin(h_c), end(h_c));
cl::copy(queue, d_c, h_c.begin(), h_c.end());

// Test the results
int correct = 0;
Expand Down

0 comments on commit 5b6e4ff

Please sign in to comment.