Skip to content

Commit

Permalink
t
Browse files Browse the repository at this point in the history
  • Loading branch information
archibate committed Jan 30, 2022
1 parent 7e328ce commit df7e227
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 14 deletions.
23 changes: 12 additions & 11 deletions 09/01_texture/01/main.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,23 @@ __global__ void kernel(cudaTextureObject_t texVel, cudaSurfaceObject_t sufLoc, u
unsigned int z = threadIdx.z + blockDim.z * blockIdx.z;
if (x >= n || y >= n || z >= n) return;
float4 vel = tex3D<float4>(texVel, x, y, z);
float4 loc = make_float4(x + 0.5f, y + 0.5f, z + 0.5f, 1.f) - vel;
surf3Dwrite<float4>(loc, sufLoc, x, y, z, cudaBoundaryModeTrap);
float4 loc = make_float4(x + 0.5f, y + 0.5f, z + 0.5f, 42.f) - vel;
surf3Dwrite<float4>(loc, sufLoc, 1, 1, 1, cudaBoundaryModeTrap);
}

int main() {
unsigned int n = 2;

auto arrLoc = CudaArray<float4>::make({{n, n, n}});
auto arrLoc = CudaArray<float4>::make({{n, n, n}});//, cudaCreateChannelDesc(16, 16, 16, 16, cudaChannelFormatKindFloat)});
auto sufLoc = CudaSurface<float4>::make(arrLoc);
auto arrVel = CudaArray<float4>::make({{n, n, n}});
auto arrVel = CudaArray<float4>::make({{n, n, n}});//, cudaCreateChannelDesc(16, 16, 16, 16, cudaChannelFormatKindFloat)});
auto sufVel = CudaSurface<float4>::make(arrVel);
auto texVel = CudaTexture<float4>::make(arrVel);

std::vector<float4> cpuVel(n * n * n);
for (int z = 0; z < n; z++) {
for (int y = 0; y < n; y++) {
for (int x = 0; x < n; x++) {
for (unsigned int z = 0; z < n; z++) {
for (unsigned int y = 0; y < n; y++) {
for (unsigned int x = 0; x < n; x++) {
cpuVel[x + n * (y + n * z)] = make_float4(1.f, 0.f, 0.f, 0.f);
}
}
Expand All @@ -40,11 +41,11 @@ int main() {

std::vector<float4> cpuLoc(n * n * n);
arrLoc.copyOut(cpuLoc.data());
for (int z = 0; z < n; z++) {
for (int y = 0; y < n; y++) {
for (int x = 0; x < n; x++) {
for (unsigned int z = 0; z < n; z++) {
for (unsigned int y = 0; y < n; y++) {
for (unsigned int x = 0; x < n; x++) {
float4 val = cpuLoc[x + n * (y + n * z)];
printf("%d,%d,%d: %f,%f,%f,%f\n", x, y, z, val.x, val.y, val.z, val.w);
printf("%u,%u,%u: %f,%f,%f,%f\n", x, y, z, val.x, val.y, val.z, val.w);
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions 09/include/CudaArray.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class CudaArray {
struct BuildArgs {
std::array<unsigned int, 3> const dim{};
cudaChannelFormatDesc desc{cudaCreateChannelDesc<T>()}; // or cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned)
int flags{0}; // or cudaArraySurfaceLoadStore
int flags{cudaArraySurfaceLoadStore}; // or 0
};

struct Impl {
Expand All @@ -25,7 +25,7 @@ class CudaArray {

void copyIn(T const *_data) {
cudaMemcpy3DParms copy3DParams{};
copy3DParams.srcPtr = make_cudaPitchedPtr((void *)_data, m_dim[0] * sizeof(T), m_dim[1], m_dim[2]);
copy3DParams.srcPtr = make_cudaPitchedPtr((void *)_data, m_dim[0] * sizeof(T), m_dim[0], m_dim[1]);
copy3DParams.dstArray = m_cuArray;
copy3DParams.extent = make_cudaExtent(m_dim[0], m_dim[1], m_dim[2]);
copy3DParams.kind = cudaMemcpyHostToDevice;
Expand All @@ -35,7 +35,7 @@ class CudaArray {
void copyOut(T *_data) {
cudaMemcpy3DParms copy3DParams{};
copy3DParams.srcArray = m_cuArray;
copy3DParams.dstPtr = make_cudaPitchedPtr((void *)_data, m_dim[0] * sizeof(T), m_dim[1], m_dim[2]);
copy3DParams.dstPtr = make_cudaPitchedPtr((void *)_data, m_dim[0] * sizeof(T), m_dim[0], m_dim[1]);
copy3DParams.extent = make_cudaExtent(m_dim[0], m_dim[1], m_dim[2]);
copy3DParams.kind = cudaMemcpyDeviceToHost;
checkCudaErrors(cudaMemcpy3D(&copy3DParams));
Expand Down

0 comments on commit df7e227

Please sign in to comment.