Skip to content

Commit

Permalink
Removed unused data to reduce shared memory usage
Browse files Browse the repository at this point in the history
  • Loading branch information
mp3guy committed Mar 23, 2015
1 parent f52734a commit 81e8503
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 19 deletions.
16 changes: 4 additions & 12 deletions src/Cuda/icp.cu
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,6 @@ __inline__ __device__ jtjjtr warpReduceSum(jtjjtr val)

val.residual += __shfl_down(val.residual, offset);
val.inliers += __shfl_down(val.inliers, offset);

val.var0 += __shfl_down(val.var0, offset);
val.var1 += __shfl_down(val.var1, offset);
val.var2 += __shfl_down(val.var2, offset);
}

return val;
Expand All @@ -70,7 +66,7 @@ __inline__ __device__ jtjjtr blockReduceSum(jtjjtr val)
const jtjjtr zero = {0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0};
0, 0, 0, 0, 0};

//ensure we only grab a value from shared memory if that warp existed
val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : zero;
Expand All @@ -88,7 +84,7 @@ __global__ void reduceSum(jtjjtr * in, jtjjtr * out, int N)
jtjjtr sum = {0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0};
0, 0, 0, 0, 0};

for(int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x)
{
Expand Down Expand Up @@ -253,11 +249,7 @@ struct ICPReduction
row[5] * row[6],

row[6] * row[6],
found_coresp,

0,
0,
0};
found_coresp};

return values;
}
Expand All @@ -268,7 +260,7 @@ struct ICPReduction
jtjjtr sum = {0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0};
0, 0, 0, 0, 0};

for(int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x)
{
Expand Down
7 changes: 0 additions & 7 deletions src/Cuda/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,6 @@ struct jtjjtr
//Extra data needed (29)
float residual, inliers;

//Spare data to round up (32)
float var0, var1, var2;

__device__ inline void add(const jtjjtr & a)
{
aa += a.aa;
Expand Down Expand Up @@ -117,10 +114,6 @@ struct jtjjtr

residual += a.residual;
inliers += a.inliers;

var0 += a.var0;
var1 += a.var1;
var2 += a.var2;
}
};

Expand Down

0 comments on commit 81e8503

Please sign in to comment.