Skip to content
This repository has been archived by the owner on Sep 2, 2023. It is now read-only.

Commit

Permalink
[ocl/streamcluster] measure the time
Browse files Browse the repository at this point in the history
  • Loading branch information
yuhc committed Sep 9, 2017
1 parent 95bf766 commit b80bdaf
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 69 deletions.
1 change: 1 addition & 0 deletions opencl/streamcluster/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.txt
113 changes: 53 additions & 60 deletions opencl/streamcluster/CLHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,22 @@ double KE; //: the kernel execution time
double KC; //: the kernel compilation time
#endif

//Primitives for timing
#ifdef TIMING
#include "timing.h"

struct timeval tv;
struct timeval tv_total_start, tv_total_end;
struct timeval tv_init_end;
struct timeval tv_h2d_start, tv_h2d_end;
struct timeval tv_d2h_start, tv_d2h_end;
struct timeval tv_kernel_start, tv_kernel_end;
struct timeval tv_mem_alloc_start, tv_mem_alloc_end;
struct timeval tv_close_start, tv_close_end;
float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0,
d2h_time = 0, close_time = 0, total_time = 0;
#endif

//#pragma OPENCL EXTENSION cl_nv_compiler_options:enable
#define WORK_DIM 2 //work-items dimensions
/*------------------------------------------------------------
Expand Down Expand Up @@ -215,14 +231,14 @@ string FileToString(const string fileName){
@return:
@date: 24/03/2011
------------------------------------------------------------*/
char device_type[3];
int device_id = 0;
int platform_id = 0;
void _clCmdParams(int argc, char* argv[]){
for (int i = 0; i < argc; ++i){
switch (argv[i][1]){
case 't': //--t stands for device type
case 'p': //--t stands for platform
if (++i < argc){
sscanf(argv[i], "%s", device_type);
sscanf(argv[i], "%d", &platform_id);
}
else{
std::cerr << "Could not read argument after option " << argv[i-1] << std::endl;
Expand Down Expand Up @@ -260,7 +276,7 @@ void _clCmdParams(int argc, char* argv[]){
get the number of devices and devices have no relationship with context
@date: 24/03/2011
------------------------------------------------------------*/
void _clInit(string device_type, int device_id)throw(string){
void _clInit(int platform_id, int device_id)throw(string){

#ifdef PROFILE_
TE = 0;
Expand Down Expand Up @@ -311,7 +327,7 @@ void _clInit(string device_type, int device_id)throw(string){
throw (string("InitCL()::Error: Getting platform ids (clGetPlatformIDs)"));

// Select the target platform. Default: first platform
targetPlatform = allPlatforms[0];
targetPlatform = allPlatforms[platform_id];
for (int i = 0; i < numPlatforms; i++)
{
char pbuff[128];
Expand All @@ -333,36 +349,14 @@ void _clInit(string device_type, int device_id)throw(string){
//-----------------------------------------------
//--cambine-2: detect OpenCL devices
// First, get the size of device list
if(device_type.compare("")!=0){
if(device_type.compare("cpu")==0){
oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, 0, NULL, &deviceListSize);
if(oclHandles.cl_status!=CL_SUCCESS){
throw(string("exception in _clInit -> clGetDeviceIDs -> CPU"));
}
}
if(device_type.compare("gpu")==0){
oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_GPU, 0, NULL, &deviceListSize);
if(oclHandles.cl_status!=CL_SUCCESS){
throw(string("exception in _clInit -> clGetDeviceIDs -> GPU"));
}
}
if(device_type.compare("acc")==0){
oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &deviceListSize);
if(oclHandles.cl_status!=CL_SUCCESS){
throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR"));
}
}
}
else{
oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceListSize);
if(oclHandles.cl_status!=CL_SUCCESS){
throw(string("exception in _clInit -> clGetDeviceIDs -> ALL"));
}
}

oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceListSize);
if(oclHandles.cl_status!=CL_SUCCESS){
throw(string("exception in _clInit -> clGetDeviceIDs -> ALL"));
}

if (deviceListSize == 0)
throw(string("InitCL()::Error: No devices found."));

#ifdef DEV_INFO
std::cout<<"--cambine: number of device="<<deviceListSize<<std::endl;
#endif
Expand All @@ -375,31 +369,9 @@ void _clInit(string device_type, int device_id)throw(string){
throw(string("InitCL()::Error: Could not allocate memory."));

// Next, get the device list data
if(device_type.compare("")!=0){
if(device_type.compare("cpu")==0){
oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, deviceListSize, oclHandles.devices, NULL);
if(oclHandles.cl_status!=CL_SUCCESS){
throw(string("exception in _clInit -> clGetDeviceIDs -> CPU ->2"));
}
}
if(device_type.compare("gpu")==0){
oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_GPU, deviceListSize, oclHandles.devices, NULL);
if(oclHandles.cl_status!=CL_SUCCESS){
throw(string("exception in _clInit -> clGetDeviceIDs -> GPU -> 2"));
}
}
if(device_type.compare("acc")==0){
oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ACCELERATOR, deviceListSize, oclHandles.devices, NULL);
if(oclHandles.cl_status!=CL_SUCCESS){
throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR -> 2"));
}
}
}
else{
oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, deviceListSize, oclHandles.devices, NULL);
if(oclHandles.cl_status!=CL_SUCCESS){
throw(string("exception in _clInit -> clGetDeviceIDs -> ALL -> 2"));
}
oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, deviceListSize, oclHandles.devices, NULL);
if(oclHandles.cl_status!=CL_SUCCESS){
throw(string("exception in _clInit -> clGetDeviceIDs -> ALL -> 2"));
}
if(device_id!=0){
if(device_id>(deviceListSize-1))
Expand Down Expand Up @@ -435,10 +407,17 @@ void _clInit(string device_type, int device_id)throw(string){

//-----------------------------------------------
//--cambine-4: Create an OpenCL command queue
#ifdef TIMING
oclHandles.queue = clCreateCommandQueue(oclHandles.context,
oclHandles.devices[DEVICE_ID_INUSED],
CL_QUEUE_PROFILING_ENABLE,
&resultCL);
#else
oclHandles.queue = clCreateCommandQueue(oclHandles.context,
oclHandles.devices[DEVICE_ID_INUSED],
0,
&resultCL);
#endif

if ((resultCL != CL_SUCCESS) || (oclHandles.queue == NULL))
throw(string("InitCL()::Creating Command Queue. (clCreateCommandQueue)"));
Expand Down Expand Up @@ -855,7 +834,8 @@ void _clMemcpyH2D(cl_mem dst, const void *src, int size) throw(string){
#ifdef PROFILE_
double t1 = gettime();
#endif
oclHandles.cl_status = clEnqueueWriteBuffer(oclHandles.queue, dst, CL_TRUE, 0, size, src, 0, NULL, NULL);
cl_event event;
oclHandles.cl_status = clEnqueueWriteBuffer(oclHandles.queue, dst, CL_TRUE, 0, size, src, 0, NULL, &event);
#ifdef ERRMSG
if(oclHandles.cl_status != CL_SUCCESS){
oclHandles.error_str = "excpetion in _clMemcpyH2D -> ";
Expand Down Expand Up @@ -892,6 +872,9 @@ void _clMemcpyH2D(cl_mem dst, const void *src, int size) throw(string){
double t2 = gettime();
H2D += t2 - t1;
#endif
#ifdef TIMING
h2d_time += probe_event_time(event, oclHandles.queue);
#endif
}

/*------------------------------------------------------------
Expand All @@ -907,7 +890,8 @@ void _clMemcpyD2H(void * dst, cl_mem src, int size) throw(string){
#ifdef PROFILE_
double t1 = gettime();
#endif
oclHandles.cl_status = clEnqueueReadBuffer(oclHandles.queue, src, CL_TRUE, 0, size, dst, 0,0,0);
cl_event event;
oclHandles.cl_status = clEnqueueReadBuffer(oclHandles.queue, src, CL_TRUE, 0, size, dst, 0,0,&event);
#ifdef ERRMSG
if(oclHandles.cl_status != CL_SUCCESS){
oclHandles.error_str = "excpetion in _clMemCpyD2H -> ";
Expand Down Expand Up @@ -944,6 +928,9 @@ void _clMemcpyD2H(void * dst, cl_mem src, int size) throw(string){
double t2 = gettime();
D2H += t2 - t1;
#endif
#ifdef TIMING
d2h_time += probe_event_time(event, oclHandles.queue);
#endif
}
/*------------------------------------------------------------
@function: transfer data from device to device
Expand Down Expand Up @@ -1227,6 +1214,9 @@ void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(s
double t2 = gettime();
KE += t2 - t1;
#endif
#ifdef TIMING
kernel_time += probe_event_time(e[0], oclHandles.queue);
#endif
}

/*------------------------------------------------------------
Expand Down Expand Up @@ -1346,6 +1336,9 @@ void _clMemset(cl_mem mem_d, short val, int number_bytes)throw(string){
double t2 = gettime();
H2D += t2 - t1;
#endif
#ifdef TIMING
h2d_time += probe_event_time(e[0], oclHandles.queue);
#endif
}
/*------------------------------------------------------------
@function: entry of invoke the kernel function using 2d working items
Expand Down
8 changes: 5 additions & 3 deletions opencl/streamcluster/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ CC = g++


FLAGS = -O3
SRC = streamcluster.cpp
EXE = streamcluster
SRC = streamcluster.cpp ../util/timing.c
EXE = streamcluster.out

release:$(SRC)
$(CC) -lOpenCL $(SRC) -o $(EXE) -I$(OPENCL_INC) -L$(OPENCL_LIB) $(FLAGS)
$(CC) $(SRC) -o $(EXE) \
-I$(OPENCL_INC) -L$(OPENCL_LIB) $(FLAGS) -lOpenCL \
-I../util -DTIMING

errmsg:$(SRC)
$(CC) -lOpenCL $(SRC) -o $(EXE) -I$(OPENCL_INC) -L$(OPENCL_LIB) -D ERRMSG $(FLAGS)
Expand Down
2 changes: 1 addition & 1 deletion opencl/streamcluster/run
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@
for i in `seq 1 1`; do
#echo `expr $j \* 10240` `expr $j \* 1024`
#./sc_gpu 10 20 16 `expr $j \* 10240` `expr $j \* 1024` 1000 none output.txt 1
./streamcluster 10 20 256 65536 65536 1000 none output.txt 1 -t gpu -d 0
./streamcluster.out 10 20 256 65536 65536 1000 none output.txt 1 -p 0 -d 0
done
43 changes: 39 additions & 4 deletions opencl/streamcluster/streamcluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,7 @@ void outcenterIDs( Points* centers, long* centerIDs, char* outfile ) {

for( int i = 0; i < centers->num; i++ ) {
if( is_a_median[i] ) {
fprintf(fp, "%u\n", centerIDs[i]);
fprintf(fp, "%ld\n", centerIDs[i]);
fprintf(fp, "%lf\n", centers->p[i].weight);
for( int k = 0; k < centers->dim; k++ ) {
fprintf(fp, "%lf ", centers->p[i].coord[k]);
Expand Down Expand Up @@ -802,7 +802,7 @@ void streamCluster( PStream* stream,
while(1) {

size_t numRead = stream->read(block, dim, chunksize );
fprintf(stderr,"read %d points\n",numRead);
fprintf(stderr,"read %lu points\n",numRead);

if( stream->ferror() || numRead < (unsigned int)chunksize && !stream->feof() ) {
fprintf(stderr, "error reading data!\n");
Expand Down Expand Up @@ -876,7 +876,7 @@ int main(int argc, char **argv)
#endif

if (argc<11) {
fprintf(stderr,"usage: %s k1 k2 d n chunksize clustersize infile outfile nproc\n",
fprintf(stderr,"usage: %s k1 k2 d n chunksize clustersize infile outfile nproc [-p platform] [-d device]\n",
argv[0]);
fprintf(stderr," k1: Min. number of centers allowed\n");
fprintf(stderr," k2: Max. number of centers allowed\n");
Expand All @@ -887,6 +887,8 @@ int main(int argc, char **argv)
fprintf(stderr," infile: Input file (if n<=0)\n");
fprintf(stderr," outfile: Output file\n");
fprintf(stderr," nproc: Number of threads to use\n");
fprintf(stderr," platform: Platform id\n");
fprintf(stderr," device: Device id\n");
fprintf(stderr,"\n");
fprintf(stderr, "if n > 0, points will be randomly generated instead of reading from infile.\n");
exit(1);
Expand All @@ -901,13 +903,23 @@ int main(int argc, char **argv)
strcpy(outfilename, argv[8]);
nproc = atoi(argv[9]);
_clCmdParams(argc, argv);

#ifdef TIMING
gettimeofday(&tv_total_start, NULL);
#endif
try{
_clInit(device_type, device_id);
_clInit(platform_id, device_id);
}
catch(std::string msg){
std::cout<<"exception caught in main function->"<<msg<<std::endl;
return -1;
}
#ifdef TIMING
gettimeofday(&tv_init_end, NULL);
tvsub(&tv_init_end, &tv_total_start, &tv);
init_time = tv.tv_sec * 1000.0 + (float) tv.tv_usec / 1000.0;
#endif

srand48(SEED);
PStream* stream;
if( n > 0 ) {
Expand Down Expand Up @@ -937,11 +949,21 @@ int main(int argc, char **argv)
#ifdef PROFILE_TMP
gpu_free = gettime();
#endif
#ifdef TIMING
gettimeofday(&tv_close_start, NULL);
#endif

freeDevMem();
#ifdef PROFILE_TMP
gpu_free = gettime() - gpu_free;
#endif
_clRelease();
#ifdef TIMING
gettimeofday(&tv_close_end, NULL);
tvsub(&tv_close_end, &tv_close_start, &tv);
close_time = tv.tv_sec * 1000.0 + (float) tv.tv_usec / 1000.0;
#endif

#ifdef ENABLE_PARSEC_HOOKS
__parsec_roi_end();
#endif
Expand All @@ -953,6 +975,19 @@ int main(int argc, char **argv)

delete stream;

#ifdef TIMING
tvsub(&tv_close_end, &tv_total_start, &tv);
total_time = tv.tv_sec * 1000.0 + (float) tv.tv_usec / 1000.0;

printf("Init: %f\n", init_time);
printf("MemAlloc: %f\n", mem_alloc_time);
printf("HtoD: %f\n", h2d_time);
printf("Exec: %f\n", kernel_time);
printf("DtoH: %f\n", d2h_time);
printf("Close: %f\n", close_time);
printf("Total: %f\n", total_time);
#endif

#ifdef PROFILE_TMP
printf("time pgain = %lf\n", time_gain);
printf("time pgain_dist = %lf\n", time_gain_dist);
Expand Down
19 changes: 18 additions & 1 deletion opencl/streamcluster/streamcluster_cl.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,16 @@ float pgain( long x, Points *points, float z, long int *numcenters, int kmax, bo
#endif
coord_h = (float*) malloc( num * dim * sizeof(float)); // coordinates (host)
gl_lower = (float*) malloc( kmax * sizeof(float) );
#ifdef TIMING
gettimeofday(&tv_mem_alloc_start, NULL);
#endif
work_mem_h = (float*)_clMallocHost(kmax*num*sizeof(float));
#ifdef TIMING
gettimeofday(&tv_mem_alloc_end, NULL);
tvsub(&tv_mem_alloc_end, &tv_mem_alloc_start, &tv);
mem_alloc_time += tv.tv_sec * 1000.0 + (float) tv.tv_usec / 1000.0;
#endif

p_h = (Point_Struct*)malloc(num*sizeof(Point_Struct)); //by cambine: not compatibal with original Point

// prepare mapping for point coordinates
Expand All @@ -122,8 +131,16 @@ float pgain( long x, Points *points, float z, long int *numcenters, int kmax, bo
double t4 = gettime();
*serial += t4 - t3;
#endif
#ifdef TIMING
gettimeofday(&tv_mem_alloc_start, NULL);
#endif

allocDevMem(num, dim, kmax);
#ifdef TIMING
gettimeofday(&tv_mem_alloc_end, NULL);
tvsub(&tv_mem_alloc_end, &tv_mem_alloc_start, &tv);
mem_alloc_time += tv.tv_sec * 1000.0 + (float) tv.tv_usec / 1000.0;
#endif
#ifdef PROFILE_TMP
double t5 = gettime();
*gpu_malloc += t5 - t4;
Expand Down Expand Up @@ -225,7 +242,7 @@ float pgain( long x, Points *points, float z, long int *numcenters, int kmax, bo
}
}
gl_cost += work_mem_h[i*(K+1)+K];
}
}

/* if opening a center at x saves cost (i.e. cost is negative) do so
otherwise, do nothing */
Expand Down

0 comments on commit b80bdaf

Please sign in to comment.