forked from gorgonia/cu
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbatchedPatterns.go
74 lines (63 loc) · 2.08 KB
/
batchedPatterns.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
package cu
// #include <cuda.h>
// #include "batch.h"
import "C"
import (
"unsafe"
"github.com/pkg/errors"
)
/* COMMON PATTERNS */
// Attributes gets multiple attributes as provided
func (dev Device) Attributes(attrs ...DeviceAttribute) ([]int, error) {
if len(attrs) == 0 {
return nil, nil
}
cAttrs := make([]C.CUdevice_attribute, len(attrs))
cRetVal := make([]C.int, len(attrs))
size := C.int(len(attrs))
for i, v := range attrs {
cAttrs[i] = C.CUdevice_attribute(v)
}
err := result(C.cuDeviceGetAttributes(&cRetVal[0], &cAttrs[0], size, C.CUdevice(dev)))
retVal := make([]int, len(attrs))
for i, v := range cRetVal {
retVal[i] = int(v)
}
return retVal, err
}
// LaunchAndSync launches the kernel and synchronizes the context
func (fn Function) LaunchAndSync(gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes int, stream Stream, kernelParams []unsafe.Pointer) error {
argv := C.malloc(C.size_t(len(kernelParams) * pointerSize))
argp := C.malloc(C.size_t(len(kernelParams) * pointerSize))
defer C.free(argv)
defer C.free(argp)
for i := range kernelParams {
*((*unsafe.Pointer)(offset(argp, i))) = offset(argv, i) // argp[i] = &argv[i]
*((*uint64)(offset(argv, i))) = *((*uint64)(kernelParams[i])) // argv[i] = *kernelParams[i]
}
f := C.CUfunction(unsafe.Pointer(uintptr(fn)))
err := result(C.cuLaunchAndSync(
f,
C.uint(gridDimX),
C.uint(gridDimY),
C.uint(gridDimZ),
C.uint(blockDimX),
C.uint(blockDimY),
C.uint(blockDimZ),
C.uint(sharedMemBytes),
C.CUstream(unsafe.Pointer(uintptr(stream))),
(*unsafe.Pointer)(argp),
(*unsafe.Pointer)(unsafe.Pointer(uintptr(0)))))
return err
}
// AllocAndCopy abstracts away the common pattern of allocating and then copying a Go slice to the GPU
func AllocAndCopy(p unsafe.Pointer, bytesize int64) (DevicePtr, error) {
if bytesize == 0 {
return 0, errors.Wrapf(InvalidValue, "Cannot allocate memory with size 0")
}
var d C.CUdeviceptr
if err := result(C.cuAllocAndCopy(&d, p, C.size_t(bytesize))); err != nil {
return 0, errors.Wrapf(err, "AllocAndCopy")
}
return DevicePtr(d), nil
}