Skip to content

Commit

Permalink
Experimental compute capability 2.0 support
Browse files Browse the repository at this point in the history
  • Loading branch information
mp3guy committed Mar 29, 2015
1 parent b1e0f48 commit 1815d03
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ file(GLOB srcs *.cpp)
file(GLOB cuda Cuda/*.cu)
file(GLOB containers Cuda/containers/*.cpp)

set(CUDA_ARCH_BIN "30 32 35 37 50 52" CACHE STRING "Specify 'real' GPU arch to build binaries for, BIN(PTX) format is supported. Example: 1.3 2.1(1.3) or 13 21(13)")
set(CUDA_ARCH_BIN "20 30 32 35 37 50 52" CACHE STRING "Specify 'real' GPU arch to build binaries for, BIN(PTX) format is supported. Example: 1.3 2.1(1.3) or 13 21(13)")
set(CUDA_ARCH_PTX "" CACHE STRING "Specify 'virtual' PTX arch to build PTX intermediate code for. Example: 1.0 1.2 or 10 12")

SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR})
Expand Down
16 changes: 15 additions & 1 deletion src/Cuda/icp.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,20 @@
#include "vector_math.hpp"
#include "containers/safe_call.hpp"

#if __CUDA_ARCH__ < 300
__inline__ __device__
float __shfl_down(float val, int offset, int width = 32)
{
static __shared__ float shared[MAX_THREADS];
int lane = threadIdx.x % 32;
shared[threadIdx.x] = val;
__syncthreads();
val = (lane + offset < width) ? shared[threadIdx.x + offset] : 0;
__syncthreads();
return val;
}
#endif

__inline__ __device__ jtjjtr warpReduceSum(jtjjtr val)
{
for(int offset = warpSize / 2; offset > 0; offset /= 2)
Expand Down Expand Up @@ -331,7 +345,7 @@ void icpStep(const Mat33& Rcurr,

icpKernel<<<blocks, threads>>>(icp);

reduceSum<<<1, 1024>>>(sum, out, blocks);
reduceSum<<<1, MAX_THREADS>>>(sum, out, blocks);

cudaSafeCall(cudaGetLastError());
cudaSafeCall(cudaDeviceSynchronize());
Expand Down
6 changes: 6 additions & 0 deletions src/Cuda/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@
#include <vector_types.h>
#include <cuda_runtime_api.h>

#if __CUDA_ARCH__ < 300
#define MAX_THREADS 512
#else
#define MAX_THREADS 1024
#endif

/** \brief Camera intrinsics structure
*/
struct Intr
Expand Down
2 changes: 1 addition & 1 deletion src/ICPOdometry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ ICPOdometry::ICPOdometry(int width,
height(height),
cx(cx), cy(cy), fx(fx), fy(fy)
{
sumData.create(1024);
sumData.create(MAX_THREADS);
outData.create(1);

intr.cx = cx;
Expand Down

0 comments on commit 1815d03

Please sign in to comment.