forked from bryancatanzaro/damascene
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtexton.64.cu
125 lines (100 loc) · 4.27 KB
/
texton.64.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#include <cuda.h>
#include "filters.h"
#include "kmeans.h"
#include <cutil.h>
#include <stdio.h>
texture<float, 2, cudaReadModeElementType> image;
texture<float, 1, cudaReadModeElementType> tex_coefficients;
__device__ __constant__ int radii[170]; // can fit upto 10 scales!
/* __device__ __constant__ float coefficients[9010]; */
/*
__global__ void convolve(int filterCount, int nPixels, int width, int height, float* output) {
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
if ((x < width) && (y < height)) {
int coefficientIndex = 0;
for(int filter = 0; filter < filterCount; filter++) {
int radius = radii[filter];
float result = 0.0f;
for(int compX = x - radius; compX <= x + radius; compX++) {
for(int compY = y - radius; compY <= y + radius; compY++) {
result += tex2D(image, compX, compY) * coefficients[coefficientIndex];
coefficientIndex++;
}
}
output[nPixels * filter + y * width + x] = result;
}
}
}
*/
__global__ void convolve(int filterCount, int nPixels, int width, int height, float* output) {
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
if ((x < width) && (y < height)) {
int coefficientIndex = 0;
for(int filter = 0; filter < filterCount; filter++) {
int radius = radii[filter];
float result = 0.0f;
for(int compX = x - radius; compX <= x + radius; compX++) {
for(int compY = y - radius; compY <= y + radius; compY++) {
//result += tex2D(image, compX, compY) * coefficients[coefficientIndex];
result += tex2D(image, compX, compY) * tex1Dfetch(tex_coefficients, coefficientIndex);
coefficientIndex++;
}
}
output[nPixels * filter + y * width + x] = result;
}
}
}
void findTextons(int width, int height, float* devImage, int** p_devTextons) {
//int filterCount = 34;
int clusterCount = 64;
int nPixels = width * height;
float* devResponses;
float* hCoefficients = 0;
int* hRadii = 0;
int nscales = 2;
float *scales = new float[nscales];
scales[0] = 2.0;
scales[1] = 2.0*M_SQRT2;
int filterCount = 17*nscales;
int nFilterCoefficients;
createTextonFilters(&hCoefficients, &nFilterCoefficients, &hRadii, scales, nscales);
// float* f = new float[169];
//// f = gaussian_cs_2D(2,2,0,M_SQRT2l, 6,6);
// gaussian_2D(f,2,2.0/3.0,0,2,false,6,6);
//
// f = gaussian_2D(2,2.0/3.0, 0, 2, true, 6,6);
// for(int i=0;i<169;i++)
// {
// printf("%f ", hfilters[i]);
// //printf("%f ", f[i]);
// if((i+1)%13==0) printf("\n");
// }
// delete[] f;
CUDA_SAFE_CALL(cudaMalloc((void**)&devResponses, sizeof(float)*nPixels*filterCount));
//CUDA_SAFE_CALL(cudaMemcpyToSymbol(radii, hRadii, sizeof(hRadii)));
CUDA_SAFE_CALL(cudaMemcpyToSymbol(radii, hRadii, filterCount*sizeof(int)));
//CUDA_SAFE_CALL(cudaMemcpyToSymbol(coefficients, hCoefficients, sizeof(hCoefficients)));
//CUDA_SAFE_CALL(cudaMemcpyToSymbol(coefficients, hCoefficients, nFilterCoefficients* sizeof(float)));
float* devcoefficients;
CUDA_SAFE_CALL(cudaMalloc((void**)&devcoefficients, nFilterCoefficients* sizeof(float)));
CUDA_SAFE_CALL(cudaMemcpy(devcoefficients, hCoefficients, nFilterCoefficients* sizeof(float), cudaMemcpyHostToDevice));
cudaChannelFormatDesc channelMax = cudaCreateChannelDesc<float>();
size_t offset = 0;
cudaBindTexture(&offset, &tex_coefficients, devcoefficients, &channelMax, nFilterCoefficients*sizeof(float));
cudaArray* imageArray;
cudaChannelFormatDesc floatTex = cudaCreateChannelDesc<float>();
CUDA_SAFE_CALL(cudaMallocArray(&imageArray, &floatTex, width, height));
CUDA_SAFE_CALL(cudaMemcpyToArray(imageArray, 0, 0, devImage, nPixels * sizeof(float), cudaMemcpyDeviceToDevice));
CUDA_SAFE_CALL(cudaBindTextureToArray(image, imageArray));
printf("Convolving\n");
dim3 gridDim = dim3((width - 1)/XBLOCK + 1, (height - 1)/YBLOCK + 1);
dim3 blockDim = dim3(XBLOCK, YBLOCK);
convolve<<<gridDim, blockDim>>>(filterCount, nPixels, width, height, devResponses);
kmeans(nPixels, width, height, clusterCount, filterCount, devResponses, p_devTextons);
CUDA_SAFE_CALL(cudaFreeArray(imageArray));
CUDA_SAFE_CALL(cudaFree(devResponses));
free(hRadii);
free(hCoefficients);
}