From 2872df01f4d12c0f92b953329e35d9d4296ae94a Mon Sep 17 00:00:00 2001 From: archibate <1931127624@qq.com> Date: Thu, 3 Feb 2022 14:01:50 +0800 Subject: [PATCH] fix div --- 09/01_texture/08/main.cu | 111 ++++++++++++++------------------------- 1 file changed, 39 insertions(+), 72 deletions(-) diff --git a/09/01_texture/08/main.cu b/09/01_texture/08/main.cu index e72f823b..c9973bd0 100644 --- a/09/01_texture/08/main.cu +++ b/09/01_texture/08/main.cu @@ -19,7 +19,7 @@ sufClr.write(clr, x, y, z); }*/ -__global__ void advect_kernel(CudaTextureAccessor texVel, CudaSurfaceAccessor sufLoc, CudaSurfaceAccessor sufBound, unsigned int n) { +__global__ void advect_kernel(CudaTextureAccessor texVel, CudaSurfaceAccessor sufLoc, unsigned int n) { int x = threadIdx.x + blockDim.x * blockIdx.x; int y = threadIdx.y + blockDim.y * blockIdx.y; int z = threadIdx.z + blockDim.z * blockIdx.z; @@ -31,27 +31,35 @@ __global__ void advect_kernel(CudaTextureAccessor texVel, CudaSurfaceAcc }; float3 loc = make_float3(x + 0.5f, y + 0.5f, z + 0.5f); - if (sufBound.read(x, y, z) >= 0) { - float3 vel1 = sample(texVel, loc); - float3 vel2 = sample(texVel, loc - 0.5f * vel1); - float3 vel3 = sample(texVel, loc - 0.75f * vel2); - loc -= (2.f / 9.f) * vel1 + (1.f / 3.f) * vel2 + (4.f / 9.f) * vel3; - } + float3 vel1 = sample(texVel, loc); + float3 vel2 = sample(texVel, loc - 0.5f * vel1); + float3 vel3 = sample(texVel, loc - 0.75f * vel2); + loc -= (2.f / 9.f) * vel1 + (1.f / 3.f) * vel2 + (4.f / 9.f) * vel3; sufLoc.write(make_float4(loc.x, loc.y, loc.z, 0.f), x, y, z); } -__global__ void resample_kernel(CudaSurfaceAccessor sufLoc, CudaTextureAccessor texClr, CudaSurfaceAccessor sufClrNext, unsigned int n) { +__global__ void resample_kernel(CudaSurfaceAccessor sufLoc, CudaTextureAccessor texClr, CudaSurfaceAccessor sufClrNext, CudaSurfaceAccessor sufBound, unsigned int n) { int x = threadIdx.x + blockDim.x * blockIdx.x; int y = threadIdx.y + blockDim.y * blockIdx.y; int z = threadIdx.z + blockDim.z * blockIdx.z; if (x >= n || y >= n || z >= n) return; float4 loc = sufLoc.read(x, y, z); - float4 clr = texClr.sample(loc.x, loc.y, loc.z); + + float bound = sufBound.read(x, y, z); + float4 clr; + if (bound <= 0) { + clr = texClr.sample(loc.x, loc.y, loc.z); + } else if (bound >= 1) { + clr = texClr.sample(x + 0.5f, y + 0.5f, z + 0.5f); + } else { + clr = texClr.sample(x + 0.5f, y + 0.5f, z + 0.5f) * (1 - bound) + bound * texClr.sample(loc.x, loc.y, loc.z); + } + sufClrNext.write(clr, x, y, z); } -__global__ void divergence_kernel(CudaSurfaceAccessor sufVel, CudaSurfaceAccessor sufDiv, unsigned int n) { +__global__ void divergence_kernel(CudaSurfaceAccessor sufVel, CudaSurfaceAccessor sufDiv, CudaSurfaceAccessor sufBound, unsigned int n) { int x = threadIdx.x + blockDim.x * blockIdx.x; int y = threadIdx.y + blockDim.y * blockIdx.y; int z = threadIdx.z + blockDim.z * blockIdx.z; @@ -64,6 +72,10 @@ __global__ void divergence_kernel(CudaSurfaceAccessor sufVel, CudaSurfac float vyn = sufVel.read(x, y - 1, z).y; float vzn = sufVel.read(x, y, z - 1).z; float div = (vxp - vxn + vyp - vyn + vzp - vzn) * 0.5f; + + float bound = sufBound.read(x, y, z); + div *= 1 - bound; + sufDiv.write(div, x, y, z); } @@ -100,7 +112,9 @@ __global__ void subgradient_kernel(CudaSurfaceAccessor sufPre, CudaSurfac int y = threadIdx.y + blockDim.y * blockIdx.y; int z = threadIdx.z + blockDim.z * blockIdx.z; if (x >= n || y >= n || z >= n) return; - if (sufBound.read(x, y, z) < 0) return; + + float bound = sufBound.read(x, y, z); + float fac = 0.5f * (1 - bound); float pxn = sufPre.read(x - 1, y, z); float pyn = sufPre.read(x, y - 1, z); @@ -109,9 +123,9 @@ __global__ void subgradient_kernel(CudaSurfaceAccessor sufPre, CudaSurfac float pyp = sufPre.read(x, y + 1, z); float pzp = sufPre.read(x, y, z + 1); float4 vel = sufVel.read(x, y, z); - vel.x -= (pxp - pxn) * 0.5f; - vel.y -= (pyp - pyn) * 0.5f; - vel.z -= (pzp - pzn) * 0.5f; + vel.x -= (pxp - pxn) * fac; + vel.y -= (pyp - pyn) * fac; + vel.z -= (pzp - pzn) * fac; sufVel.write(vel, x, y, z); } @@ -134,45 +148,6 @@ __global__ void rbgs_kernel(CudaSurfaceAccessor sufPre, CudaSurfaceAccess sufPre.write(preNext, x, y, z); } -template -__global__ void boundrbgs_kernel(CudaSurfaceAccessor sufPre, CudaSurfaceAccessor sufDiv, CudaSurfaceAccessor sufBound, unsigned int n) { - int x = threadIdx.x + blockDim.x * blockIdx.x; - int y = threadIdx.y + blockDim.y * blockIdx.y; - int z = threadIdx.z + blockDim.z * blockIdx.z; - if (x >= n || y >= n || z >= n) return; - if ((x + y + z) % 2 != phase) return; - - float pxp = sufPre.read(x + 1, y, z); - float pxn = sufPre.read(x - 1, y, z); - float pyp = sufPre.read(x, y + 1, z); - float pyn = sufPre.read(x, y - 1, z); - float pzp = sufPre.read(x, y, z + 1); - float pzn = sufPre.read(x, y, z - 1); - float div = sufDiv.read(x, y, z); - float preNext = (pxp + pxn + pyp + pyn + pzp + pzn - div) * (1.f / 6.f); - if (sufBound.read(x, y, z) < 0) preNext += div * (1.f / 6.f); - sufPre.write(preNext, x, y, z); -} - -__global__ void boundres_kernel(CudaSurfaceAccessor sufRes, CudaSurfaceAccessor sufPre, CudaSurfaceAccessor sufDiv, CudaSurfaceAccessor sufBound, unsigned int n) { - int x = threadIdx.x + blockDim.x * blockIdx.x; - int y = threadIdx.y + blockDim.y * blockIdx.y; - int z = threadIdx.z + blockDim.z * blockIdx.z; - if (x >= n || y >= n || z >= n) return; - - float pxp = sufPre.read(x + 1, y, z); - float pxn = sufPre.read(x - 1, y, z); - float pyp = sufPre.read(x, y + 1, z); - float pyn = sufPre.read(x, y - 1, z); - float pzp = sufPre.read(x, y, z + 1); - float pzn = sufPre.read(x, y, z - 1); - float pre = sufPre.read(x, y, z); - float div = sufDiv.read(x, y, z); - float res = pxp + pxn + pyp + pyn + pzp + pzn - 6.f * pre - div; - if (sufBound.read(x, y, z) < 0) res += div; - sufRes.write(res, x, y, z); -} - __global__ void residual_kernel(CudaSurfaceAccessor sufRes, CudaSurfaceAccessor sufPre, CudaSurfaceAccessor sufDiv, unsigned int n) { int x = threadIdx.x + blockDim.x * blockIdx.x; int y = threadIdx.y + blockDim.y * blockIdx.y; @@ -282,13 +257,8 @@ struct SmokeSim : DisableCopy { void smooth(CudaSurface *v, CudaSurface *f, unsigned int lev, int times = 4) { unsigned int tn = sizes[lev]; for (int step = 0; step < times; step++) { - if (lev == 0) { - boundrbgs_kernel<0><<>>(v->accessSurface(), f->accessSurface(), bound->accessSurface(), tn); - boundrbgs_kernel<1><<>>(v->accessSurface(), f->accessSurface(), bound->accessSurface(), tn); - } else { - rbgs_kernel<0><<>>(v->accessSurface(), f->accessSurface(), tn); - rbgs_kernel<1><<>>(v->accessSurface(), f->accessSurface(), tn); - } + rbgs_kernel<0><<>>(v->accessSurface(), f->accessSurface(), tn); + rbgs_kernel<1><<>>(v->accessSurface(), f->accessSurface(), tn); } } @@ -303,11 +273,7 @@ struct SmokeSim : DisableCopy { auto *e2 = err2[lev].get(); unsigned int tn = sizes[lev]; smooth(v, f, lev); - if (lev == 0) { - boundres_kernel<<>>(r->accessSurface(), v->accessSurface(), f->accessSurface(), bound->accessSurface(), tn); - } else { - residual_kernel<<>>(r->accessSurface(), v->accessSurface(), f->accessSurface(), tn); - } + residual_kernel<<>>(r->accessSurface(), v->accessSurface(), f->accessSurface(), tn); restrict_kernel<<>>(r2->accessSurface(), r->accessSurface(), tn/2); fillzero_kernel<<>>(e2->accessSurface(), tn/2); vcycle(lev + 1, e2, r2); @@ -316,15 +282,15 @@ struct SmokeSim : DisableCopy { } void projection() { - divergence_kernel<<>>(vel->accessSurface(), div->accessSurface(), n); + divergence_kernel<<>>(vel->accessSurface(), div->accessSurface(), bound->accessSurface(), n); vcycle(0, pre.get(), div.get()); subgradient_kernel<<>>(pre->accessSurface(), vel->accessSurface(), bound->accessSurface(), n); } void advection() { - advect_kernel<<>>(vel->accessTexture(), loc->accessSurface(), bound->accessSurface(), n); - resample_kernel<<>>(loc->accessSurface(), clr->accessTexture(), clrNext->accessSurface(), n); - resample_kernel<<>>(loc->accessSurface(), vel->accessTexture(), velNext->accessSurface(), n); + advect_kernel<<>>(vel->accessTexture(), loc->accessSurface(), n); + resample_kernel<<>>(loc->accessSurface(), clr->accessTexture(), clrNext->accessSurface(), bound->accessSurface(), n); + resample_kernel<<>>(loc->accessSurface(), vel->accessTexture(), velNext->accessSurface(), bound->accessSurface(), n); std::swap(vel, velNext); std::swap(clr, clrNext); @@ -339,7 +305,7 @@ struct SmokeSim : DisableCopy { } /*void old_projection(int times = 50) { - divergence_kernel<<>>(vel->accessSurface(), div->accessSurface(), n); + divergence_kernel<<>>(vel->accessSurface(), div->accessSurface(), bound->accessSurface(), n); for (int step = 0; step < times; step++) { jacobi_kernel<<>>(div->accessSurface(), pre->accessSurface(), preNext->accessSurface(), bound->accessSurface(), n); @@ -350,7 +316,7 @@ struct SmokeSim : DisableCopy { }*/ float calc_loss() { - divergence_kernel<<>>(vel->accessSurface(), div->accessSurface(), n); + divergence_kernel<<>>(vel->accessSurface(), div->accessSurface(), bound->accessSurface(), n); float *sum; checkCudaErrors(cudaMalloc(&sum, sizeof(float))); sumloss_kernel<<>>(div->accessSurface(), sum, n); @@ -372,7 +338,8 @@ int main() { for (int x = 0; x < n; x++) { float sdf1 = std::hypot(x - (int)n / 2, y - (int)n / 2, z - (int)n / 4) - n / 12; float sdf2 = std::hypot(x - (int)n / 2, y - (int)n / 2, z - (int)n * 3 / 4) - n / 6; - cpu[x + n * (y + n * z)] = std::min(sdf1, sdf2); + float sdf = std::min(sdf1, sdf2); + cpu[x + n * (y + n * z)] = std::max(0.f, std::min(1.f, -sdf)); } } }