Skip to content

Commit

Permalink
Merge branch 'gert' into try-merge-gert
Browse files Browse the repository at this point in the history
Conflicts:

	Makefile.in
	src/python/gpuarray.py
  • Loading branch information
inducer committed Aug 21, 2008
2 parents 7a87c08 + d9ec83a commit b70ee8d
Show file tree
Hide file tree
Showing 16 changed files with 1,828 additions and 288 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
.pydevproject
.project
.settings
*~
.*.sw[po]
*.dat
Expand Down
5 changes: 5 additions & 0 deletions doc/source/array.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,8 @@ The :class:`GPUArray` Array Class

Same as :func:`empty`, but the :class:`GPUArray` is zero-initialized before
being returned.

We are also providing a math module which emulates the standard python math module and
executes all it's methods on the GPU instead of the CPU, if the argument is of a supported
type. Right now this is only the case for the class GPUArray and classes which inherit GPUArray
like SimpleArray.
18 changes: 17 additions & 1 deletion doc/source/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,31 @@ Using a :class:`pycuda.gpuarray.GPUArray`, the same effect can be
achieved with much less writing::

import pycuda.gpuarray as gpuarray
import pycuda.driver as cuda

cuda.init()
assert cuda.Device.count() >= 1

dev = cuda.Device(0)
ctx = dev.make_context()
a_gpu = gpuarray.to_gpu(numpy.random.randn(4,4).astype(numpy.float32))
a_doubled = (2*a_gpu).get()
print a_doubled
print a_gpu


Where to Go from Here
---------------------

Once you feel sufficiently familiar with the basics, feel free to dig into the
:ref:`reference-doc`. Also check out PyCuda's test suite at
:file:`test/test_driver.py`. It contains examples (and tests!) of many more
:file:`test`. It contains examples (and tests!) of many more
advanced techniques.

More examples
-------------

In case you want to see more examples, you can find them in the :file:`example` folder.
This folder also contains several benchmarks to see the difference between gpu and
cpu based calculations.
File renamed without changes.
File renamed without changes.
File renamed without changes.
23 changes: 23 additions & 0 deletions examples/plot_random_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#simple module to show the ploting of random data

import pycuda.gpuarray as cuda
from matplotlib.pylab import *

size = 1000

#random data generated on gpu
a = cuda.array(size).randn()


subplot(211)
plot(a)
grid(True)
ylabel('plot - gpu')

subplot(212)
hist(a, 100)
grid(True)
ylabel('histogram - gpu')

#and save it
savefig('plot-random-data')
28 changes: 8 additions & 20 deletions test/test_gpuarray_speed.py → examples/test_gpuarray_speed.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def main():
secs = start.time_till(end)*1e-3

times.append(secs/count)
flops.append(size*4)
flops.append(size)

#cpu operations which adds two arrays
aCpu = numpy.random.randn(size).astype(numpy.float32)
Expand All @@ -77,33 +77,21 @@ def main():

#add results to variable
timesCPU.append(secs/count)
flopsCPU.append(size*4)
flopsCPU.append(size)


#calculate pseudo flops
flops = [f/t for f, t in zip(flops,times)]
flopsCPU = [f/t for f, t in zip(flopsCPU,timesCPU)]

#print the data out
try:
from matplotlib.pylab import semilogx, show, title
except ImportError:
from pytools import Table
tbl = Table()
tbl.add_row(("Size", "Time GPU", "Giga Flops GPU", "Time CPU","Giga Flops CPU"))
for s, t, f,tCpu,fCpu in zip(sizes, times, flops,timesCPU,flopsCPU):
tbl.add_row((s,t,f/1000000000,tCpu,fCpu/1000000000))
print tbl
else:
title("time to add two vectors")
semilogx(sizes, times)
show()
title("flops")
semilogx(sizes, flops)
show()



from pytools import Table
tbl = Table()
tbl.add_row(("Size", "Time GPU", "Size/Time GPU", "Time CPU","Size/Time CPU","GPU vs CPU speedup"))
for s, t, f,tCpu,fCpu in zip(sizes, times, flops,timesCPU,flopsCPU):
tbl.add_row((s,t,f,tCpu,fCpu,f/fCpu))
print tbl



Expand Down
94 changes: 94 additions & 0 deletions examples/test_gpuarray_speed_random.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#! /usr/bin/env python
import pycuda.driver as drv
import numpy
import numpy.linalg as la
from pytools import Table




def main():
drv.init()
assert drv.Device.count() >= 1
ctx = drv.Device(0).make_context()

import pycuda.gpuarray as gpuarray

# make sure all the kernels are compiled
gpuarray.GPUArray.compile_kernels()
print "done compiling"

sizes = []
times = []
flops = []
flopsCPU = []
timesCPU = []

for power in range(10, 25): # 24
size = 1<<power
print size
sizes.append(size)
a = gpuarray.zeros((size,), dtype=numpy.float32)

if power > 20:
count = 100
else:
count = 1000

#start timer
start = drv.Event()
end = drv.Event()
start.record()

#cuda operation which fills the array with random numbers
for i in range(count):
a.randn()

#stop timer
end.record()
end.synchronize()

#calculate used time
secs = start.time_till(end)*1e-3

times.append(secs/count)
flops.append(size)

#cpu operations which fills teh array with random data
a = numpy.array((size,), dtype=numpy.float32)

#start timer
start = drv.Event()
end = drv.Event()
start.record()

#cpu operation which fills the array with random data
for i in range(count):
numpy.random.randn(size).astype(numpy.float32)

#stop timer
end.record()
end.synchronize()

#calculate used time
secs = start.time_till(end)*1e-3

#add results to variable
timesCPU.append(secs/count)
flopsCPU.append(size)


#calculate pseudo flops
flops = [f/t for f, t in zip(flops,times)]
flopsCPU = [f/t for f, t in zip(flopsCPU,timesCPU)]

#print the data out
tbl = Table()
tbl.add_row(("Size", "Time GPU", "Size/Time GPU", "Time CPU","Size/Time CPU","GPU vs CPU speedup"))
for s, t, f,tCpu,fCpu in zip(sizes, times, flops,timesCPU,flopsCPU):
tbl.add_row((s,t,f,tCpu,fCpu,f/fCpu))
print tbl


if __name__ == "__main__":
main()
144 changes: 144 additions & 0 deletions examples/test_math_speed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#! /usr/bin/env python
import pycuda.cumath as cuma
import pycuda._kernel as kernel
import pycuda.gpuarray as cuda
import pycuda.driver as drv
import types
import numpy as numpy
from pytools import Table

runs = 10

drv.init()
assert drv.Device.count() >= 1
ctx = drv.Device(0).make_context()


def time_cpu_execution(size,method,argumentCount):
"""times the execution time on the cpu"""

start = drv.Event()
end = drv.Event()
start.record()

a = numpy.zeros(size,numpy.float32)+1

for x in range(runs):
for i in range(size):
if argumentCount == 1:
method(a[i])
if argumentCount == 2:
method(a[i],2)

#stop timer
end.record()
end.synchronize()

#calculate used time
secs = start.time_till(end)

return secs

def time_gpu_execution(size,method,argumentCount):
"""times the execution time on the gpu"""
start = drv.Event()
end = drv.Event()
start.record()

a = cuda.array(size)+1

for x in range(runs):
if argumentCount == 1:
method(a)
if argumentCount == 2:
method(a,2)

#stop timer
end.record()
end.synchronize()

#calculate used time
secs = start.time_till(end)

return secs

#iterate over all methods and time the execution time with different array sizes
print "compile kernels"
kernel._compile_kernels(kernel)

#generate our output table, one for gpu, one for cpu
tblCPU = Table()
tblGPU = Table()
tblSPD = Table()

#contains all the method names
methods = ["size"]

for name in dir(cuma):
if (name.startswith("__") and name.endswith("__")) == False:
method = getattr(cuma, name)

if type(method) == types.FunctionType:
methods.append(name)

tblCPU.add_row(methods)
tblGPU.add_row(methods)
tblSPD.add_row(methods)

#generate arrays with differnt sizes
for power in range(1,20):
size = 1<<power

#temp variables
rowCPU = [size]
rowGPU = [size]
rowSPD = [size]

print "calculating: ", size

for name in dir(cuma):
if (name.startswith("__") and name.endswith("__")) == False:

method = getattr(cuma, name)

if type(method) == types.FunctionType:
code = method.func_code
argCount = code.co_argcount

gpu_time = time_gpu_execution(size,method,argCount)
cpu_time = time_cpu_execution(size,method,argCount)

rowCPU.append(str(cpu_time/runs)[0:7])
rowGPU.append(str(gpu_time/runs)[0:7])

speed_cpu = size/(cpu_time/runs)
speed_gpu = size/(gpu_time/runs)
rowSPD.append(str(speed_gpu/speed_cpu)[0:7])

tblCPU.add_row(rowCPU)
tblGPU.add_row(rowGPU)
tblSPD.add_row(rowSPD)

print ""

print "GPU Times (ms)"

print ""

print tblGPU

print ""

print "CPU Times (ms)"

print ""
print tblCPU


print ""

print "GPU VS CPU"

print ""
print tblSPD

Loading

0 comments on commit b70ee8d

Please sign in to comment.