Skip to content

Commit

Permalink
calc loss
Browse files Browse the repository at this point in the history
  • Loading branch information
archibate committed Jan 31, 2022
1 parent 7601435 commit dac0bb0
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 19 deletions.
14 changes: 14 additions & 0 deletions 09/01_texture/01/CudaArray.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,17 @@ public:
return {m_impl->m_cuTex};
}
};

template <class T>
struct CudaAST {
CudaArray<T> arr;
CudaSurface<T> suf;
CudaTexture<T> tex;

CudaAST(ctor_t, typename CudaArray<T>::BuildArgs const &_arrArgs, typename CudaTexture<T>::BuildArgs const &_texArgs = {})
: arr(ctor, _arrArgs)
, suf(ctor, arr)
, tex(ctor, arr, _texArgs)
{
}
};
115 changes: 96 additions & 19 deletions 09/01_texture/01/main.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,67 @@ __global__ void resample_kernel(CudaSurface<float4>::Accessor sufLoc, CudaTextur
sufClrNext.write(clr, x, y, z);
}

template <class T>
struct CudaAST {
CudaArray<T> arr;
CudaSurface<T> suf;
CudaTexture<T> tex;

CudaAST(ctor_t, typename CudaArray<T>::BuildArgs const &_arrArgs, typename CudaTexture<T>::BuildArgs const &_texArgs = {})
: arr(ctor, _arrArgs)
, suf(ctor, arr)
, tex(ctor, arr, _texArgs)
{
}
};
__global__ void divergence_kernel(CudaSurface<float4>::Accessor sufVel, CudaSurface<float>::Accessor sufDiv, unsigned int n) {
unsigned int x = threadIdx.x + blockDim.x * blockIdx.x;
unsigned int y = threadIdx.y + blockDim.y * blockIdx.y;
unsigned int z = threadIdx.z + blockDim.z * blockIdx.z;
if (x >= n || y >= n || z >= n) return;

float vxp = sufVel.read<cudaBoundaryModeClamp>(x + 1, y, z).x;
float vxn = sufVel.read<cudaBoundaryModeClamp>(x - 1, y, z).x;
float vyp = sufVel.read<cudaBoundaryModeClamp>(x, y + 1, z).y;
float vyn = sufVel.read<cudaBoundaryModeClamp>(x, y - 1, z).y;
float vzp = sufVel.read<cudaBoundaryModeClamp>(x, y, z + 1).z;
float vzn = sufVel.read<cudaBoundaryModeClamp>(x, y, z - 1).z;
float div = (vxp - vxn + vyp - vyn + vzp - vzn) * 0.5f;
sufDiv.write(div, x, y, z);
}

__global__ void sumloss_kernel(CudaSurface<float>::Accessor sufDiv, float *sum, unsigned int n) {
unsigned int x = threadIdx.x + blockDim.x * blockIdx.x;
unsigned int y = threadIdx.y + blockDim.y * blockIdx.y;
unsigned int z = threadIdx.z + blockDim.z * blockIdx.z;
if (x >= n || y >= n || z >= n) return;

float div = sufDiv.read(x, y, z);
atomicAdd(sum, div * div);
}

__global__ void jacobi_kernel(CudaSurface<float>::Accessor sufDiv, CudaSurface<float>::Accessor sufPre, CudaSurface<float>::Accessor sufPreNext, unsigned int n) {
unsigned int x = threadIdx.x + blockDim.x * blockIdx.x;
unsigned int y = threadIdx.y + blockDim.y * blockIdx.y;
unsigned int z = threadIdx.z + blockDim.z * blockIdx.z;
if (x >= n || y >= n || z >= n) return;

float pxp = sufPre.read<cudaBoundaryModeClamp>(x + 1, y, z);
float pxn = sufPre.read<cudaBoundaryModeClamp>(x - 1, y, z);
float pyp = sufPre.read<cudaBoundaryModeClamp>(x, y + 1, z);
float pyn = sufPre.read<cudaBoundaryModeClamp>(x, y - 1, z);
float pzp = sufPre.read<cudaBoundaryModeClamp>(x, y, z + 1);
float pzn = sufPre.read<cudaBoundaryModeClamp>(x, y, z - 1);
float div = sufDiv.read(x, y, z);
float preNext = (pxp + pxn + pyp + pyn + pzp + pzn - div) * (1.f / 6.f);
sufPreNext.write(preNext, x, y, z);
}

__global__ void subgradient_kernel(CudaSurface<float>::Accessor sufPre, CudaSurface<float4>::Accessor sufVel, unsigned int n) {
unsigned int x = threadIdx.x + blockDim.x * blockIdx.x;
unsigned int y = threadIdx.y + blockDim.y * blockIdx.y;
unsigned int z = threadIdx.z + blockDim.z * blockIdx.z;
if (x >= n || y >= n || z >= n) return;

float pxp = sufPre.read<cudaBoundaryModeClamp>(x + 1, y, z);
float pxn = sufPre.read<cudaBoundaryModeClamp>(x - 1, y, z);
float pyp = sufPre.read<cudaBoundaryModeClamp>(x, y + 1, z);
float pyn = sufPre.read<cudaBoundaryModeClamp>(x, y - 1, z);
float pzp = sufPre.read<cudaBoundaryModeClamp>(x, y, z + 1);
float pzn = sufPre.read<cudaBoundaryModeClamp>(x, y, z - 1);
float4 vel = sufVel.read(x, y, z);
vel.x -= 0.5f * (pxp - pxn);
vel.y -= 0.5f * (pyp - pyn);
vel.z -= 0.5f * (pzp - pzn);
sufVel.write(vel, x, y, z);
}

struct SmokeSim {
nocopy_t nocopy;
Expand All @@ -52,14 +100,20 @@ struct SmokeSim {
CudaAST<float4> velNext;
CudaAST<float4> clr;
CudaAST<float4> clrNext;
CudaAST<float> div;
CudaAST<float> pre;
CudaAST<float> preNext;

SmokeSim(unsigned int _n)
SmokeSim(ctor_t, unsigned int _n)
: n(_n)
, loc(ctor, {{n, n, n}})
, vel(ctor, {{n, n, n}})
, velNext(ctor, {{n, n, n}})
, clr(ctor, {{n, n, n}})
, clrNext(ctor, {{n, n, n}})
, div(ctor, {{n, n, n}})
, pre(ctor, {{n, n, n}})
, preNext(ctor, {{n, n, n}})
{}

void advection() {
Expand All @@ -70,19 +124,40 @@ struct SmokeSim {
std::swap(vel, velNext);
std::swap(clr, clrNext);
}

void projection(int times = 400) {
divergence_kernel<<<dim3((n + 7) / 8, (n + 7) / 8, (n + 7) / 8), dim3(8, 8, 8)>>>(vel.suf.access(), div.suf.access(), n);

for (int step = 0; step < times; step++) {
jacobi_kernel<<<dim3((n + 7) / 8, (n + 7) / 8, (n + 7) / 8), dim3(8, 8, 8)>>>(div.suf.access(), pre.suf.access(), preNext.suf.access(), n);
std::swap(pre, preNext);
}

subgradient_kernel<<<dim3((n + 7) / 8, (n + 7) / 8, (n + 7) / 8), dim3(8, 8, 8)>>>(pre.suf.access(), vel.suf.access(), n);
}

float calc_loss() {
divergence_kernel<<<dim3((n + 7) / 8, (n + 7) / 8, (n + 7) / 8), dim3(8, 8, 8)>>>(vel.suf.access(), div.suf.access(), n);
float *sum;
checkCudaErrors(cudaMalloc(&sum, sizeof(float)));
sumloss_kernel<<<dim3((n + 7) / 8, (n + 7) / 8, (n + 7) / 8), dim3(8, 8, 8)>>>(div.suf.access(), sum, n);
float cpu;
checkCudaErrors(cudaMemcpy(&cpu, sum, sizeof(float), cudaMemcpyDeviceToHost));
checkCudaErrors(cudaFree(sum));
return cpu;
}
};

int main() {
unsigned int n = 64;

SmokeSim sim(n);
SmokeSim sim(ctor, n);

{
std::vector<float4> cpu(n * n * n);
for (unsigned int z = 0; z < n; z++) {
for (unsigned int y = 0; y < n; y++) {
for (unsigned int x = 0; x < n; x++) {
float den = std::hypot((int)x - (int)n / 2, (int)y - (int)n / 2, (int)z - (int)n / 2) < n / 2 ? 1.f : 0.f;
float den = std::hypot((int)x - (int)n / 2, (int)y - (int)n / 2, (int)z - (int)n / 2) < n / 3 ? 1.f : 0.f;
cpu[x + n * (y + n * z)] = make_float4(den, 0.f, 0.f, 0.f);
}
}
Expand All @@ -95,7 +170,8 @@ int main() {
for (unsigned int z = 0; z < n; z++) {
for (unsigned int y = 0; y < n; y++) {
for (unsigned int x = 0; x < n; x++) {
cpu[x + n * (y + n * z)] = make_float4(0.1f, 0.f, 0.f, 0.f);
float vel = std::hypot((int)x - (int)n / 2, (int)y - (int)n / 2, (int)z - (int)n / 2) < n / 3 ? 0.5f : 0.f;
cpu[x + n * (y + n * z)] = make_float4(vel, 0.f, 0.f, 0.f);
}
}
}
Expand All @@ -107,8 +183,9 @@ int main() {
sim.clr.arr.copyOut(cpu.data());
writevdb<float, 1>("/tmp/a" + std::to_string(1000 + frame).substr(1) + ".vdb", cpu.data(), n, n, n, sizeof(float4));

printf("frame=%d\n", frame);
printf("frame=%d, loss=%f\n", frame, sim.calc_loss());
sim.advection();
sim.projection();
}

return 0;
Expand Down

0 comments on commit dac0bb0

Please sign in to comment.