Skip to content

Commit 346d694

Browse files
committed
ccv bench fixes for latest ccv
1 parent 02be517 commit 346d694

File tree

2 files changed

+57
-71
lines changed

2 files changed

+57
-71
lines changed

ccv/cwc-bench-runtime.cu

+48-63
Original file line numberDiff line numberDiff line change
@@ -11,28 +11,13 @@ extern "C" void cwc_bench_runtime(ccv_convnet_t* convnet, ccv_array_t* categoriz
1111
{
1212
int batch = params.mini_batch;
1313
int i;
14+
const int device_id = 0;
1415
_cwc_convnet_alloc_reserved_both(convnet, batch, 0, params.layer_params);
15-
int out_rows, out_cols, out_partition;
16-
for (i = 0; i < convnet->count; i++)
17-
{
18-
ccv_convnet_layer_t *layer = convnet->layers + i;
19-
_ccv_convnet_layer_derive_output(layer, layer->input.matrix.rows, layer->input.matrix.cols, &out_rows, &out_cols, &out_partition);
20-
switch (layer->type)
21-
{
22-
case CCV_CONVNET_CONVOLUTIONAL:
23-
cudaFree(GPU(convnet)->device[0].forwards[i]);
24-
GPU(convnet)->device[0].forwards[i] = 0;
25-
// since for the benchmark, output doesn't match input, this chooses the maximum ones to allocate
26-
cudaMalloc(&GPU(convnet)->device[0].forwards[i], sizeof(float) * ccv_max(out_rows * out_cols * layer->net.convolutional.count, layer->input.matrix.rows * layer->input.matrix.cols * layer->input.matrix.channels) * batch);
27-
assert(GPU(convnet)->device[0].forwards[i]);
28-
break;
29-
}
30-
}
3116
cwc_convnet_context_t* context = GPU(convnet)->contexts;
3217
for (i = 0; i < convnet->rows * convnet->cols * convnet->channels; i++)
3318
convnet->mean_activity->data.f32[i] = 128;
34-
_cwc_convnet_batch_formation(0, categorizeds, convnet->mean_activity, 0, 0, 0, 0, ccv_size(128, 128), convnet->rows, convnet->cols, convnet->channels, 1000, 0, batch, 0, batch, context->host[0].input, context->host[0].c);
35-
cudaMemcpy(context->device[0].input, context->host[0].input, sizeof(float) * convnet->rows * convnet->cols * convnet->channels * batch, cudaMemcpyHostToDevice);
19+
cwc_convnet_batch_formation(0, categorizeds, convnet->mean_activity, 0, 0, 0, 0, 0, ccv_size(128, 128), 128, 128, convnet->rows, convnet->cols, convnet->channels, 1000, 0, batch, 0, batch, context->host[device_id].input, context->host[device_id].c);
20+
cudaMemcpy(context->device[device_id].input, context->host[device_id].input, sizeof(float) * convnet->rows * convnet->cols * convnet->channels * batch, cudaMemcpyHostToDevice);
3621

3722
cudaEvent_t overallStart;
3823
cudaEvent_t overallStop;
@@ -43,21 +28,21 @@ extern "C" void cwc_bench_runtime(ccv_convnet_t* convnet, ccv_array_t* categoriz
4328
cudaEventCreate(&start);
4429
cudaEventCreate(&stop);
4530
float elapsed_time;
46-
VARY(GPU(convnet)->device[0].layers + 0)->convolutional.forward.x = 4;
47-
VARY(GPU(convnet)->device[0].layers + 0)->convolutional.forward.y = 8;
48-
VARY(GPU(convnet)->device[0].layers + 0)->convolutional.forward.z = 32;
49-
VARY(GPU(convnet)->device[0].layers + 1)->convolutional.forward.x = 4;
50-
VARY(GPU(convnet)->device[0].layers + 1)->convolutional.forward.y = 8;
51-
VARY(GPU(convnet)->device[0].layers + 1)->convolutional.forward.z = 32;
52-
VARY(GPU(convnet)->device[0].layers + 2)->convolutional.forward.x = 4;
53-
VARY(GPU(convnet)->device[0].layers + 2)->convolutional.forward.y = 8;
54-
VARY(GPU(convnet)->device[0].layers + 2)->convolutional.forward.z = 32;
55-
VARY(GPU(convnet)->device[0].layers + 3)->convolutional.forward.x = 4;
56-
VARY(GPU(convnet)->device[0].layers + 3)->convolutional.forward.y = 8;
57-
VARY(GPU(convnet)->device[0].layers + 3)->convolutional.forward.z = 32;
58-
VARY(GPU(convnet)->device[0].layers + 4)->convolutional.forward.x = 4;
59-
VARY(GPU(convnet)->device[0].layers + 4)->convolutional.forward.y = 8;
60-
VARY(GPU(convnet)->device[0].layers + 4)->convolutional.forward.z = 32;
31+
EXTRA(GPU(convnet)->device[0].layers + 0)->vary.convolutional.forward.x = 4;
32+
EXTRA(GPU(convnet)->device[0].layers + 0)->vary.convolutional.forward.y = 8;
33+
EXTRA(GPU(convnet)->device[0].layers + 0)->vary.convolutional.forward.z = 32;
34+
EXTRA(GPU(convnet)->device[0].layers + 1)->vary.convolutional.forward.x = 4;
35+
EXTRA(GPU(convnet)->device[0].layers + 1)->vary.convolutional.forward.y = 8;
36+
EXTRA(GPU(convnet)->device[0].layers + 1)->vary.convolutional.forward.z = 32;
37+
EXTRA(GPU(convnet)->device[0].layers + 2)->vary.convolutional.forward.x = 4;
38+
EXTRA(GPU(convnet)->device[0].layers + 2)->vary.convolutional.forward.y = 8;
39+
EXTRA(GPU(convnet)->device[0].layers + 2)->vary.convolutional.forward.z = 32;
40+
EXTRA(GPU(convnet)->device[0].layers + 3)->vary.convolutional.forward.x = 4;
41+
EXTRA(GPU(convnet)->device[0].layers + 3)->vary.convolutional.forward.y = 8;
42+
EXTRA(GPU(convnet)->device[0].layers + 3)->vary.convolutional.forward.z = 32;
43+
EXTRA(GPU(convnet)->device[0].layers + 4)->vary.convolutional.forward.x = 4;
44+
EXTRA(GPU(convnet)->device[0].layers + 4)->vary.convolutional.forward.y = 8;
45+
EXTRA(GPU(convnet)->device[0].layers + 4)->vary.convolutional.forward.z = 32;
6146
cudaEventRecord(overallStart, context->device[0].data_stream);
6247
for (i = 0; i < convnet->count; i++)
6348
{
@@ -68,7 +53,7 @@ extern "C" void cwc_bench_runtime(ccv_convnet_t* convnet, ccv_array_t* categoriz
6853
cudaEventSynchronize(stop);
6954
cudaEventElapsedTime(&elapsed_time, start, stop);
7055
if (layer->type == CCV_CONVNET_CONVOLUTIONAL)
71-
printf("%d %d %d, elapsed time for layer %d fprop: %f milliseconds\n", VARY(layer)->convolutional.forward.x, VARY(layer)->convolutional.forward.y, VARY(layer)->convolutional.forward.z, i + 1, elapsed_time);
56+
printf("%d %d %d, elapsed time for layer %d fprop: %f milliseconds\n", EXTRA(layer)->vary.convolutional.forward.x, EXTRA(layer)->vary.convolutional.forward.y, EXTRA(layer)->vary.convolutional.forward.z, i + 1, elapsed_time);
7257
else
7358
printf("elapsed time for layer %d fprop: %f milliseconds\n", i + 1, elapsed_time);
7459
}
@@ -78,33 +63,33 @@ extern "C" void cwc_bench_runtime(ccv_convnet_t* convnet, ccv_array_t* categoriz
7863
printf("forward pass %f milliseconds\n", elapsed_time);
7964

8065
/*
81-
VARY(GPU(convnet)->device[0].layers + 0)->convolutional.backward.coefficient.x = 1;
82-
VARY(GPU(convnet)->device[0].layers + 0)->convolutional.backward.coefficient.y = 3;
83-
VARY(GPU(convnet)->device[0].layers + 0)->convolutional.backward.coefficient.z = 1;
84-
VARY(GPU(convnet)->device[0].layers + 3)->convolutional.backward.coefficient.x = 4;
85-
VARY(GPU(convnet)->device[0].layers + 3)->convolutional.backward.coefficient.y = 4;
86-
VARY(GPU(convnet)->device[0].layers + 3)->convolutional.backward.coefficient.z = 16;
87-
VARY(GPU(convnet)->device[0].layers + 3)->convolutional.backward.gradient.x = 4;
88-
VARY(GPU(convnet)->device[0].layers + 3)->convolutional.backward.gradient.y = 6;
89-
VARY(GPU(convnet)->device[0].layers + 3)->convolutional.backward.gradient.z = 24;
90-
VARY(GPU(convnet)->device[0].layers + 6)->convolutional.backward.coefficient.x = 8;
91-
VARY(GPU(convnet)->device[0].layers + 6)->convolutional.backward.coefficient.y = 3;
92-
VARY(GPU(convnet)->device[0].layers + 6)->convolutional.backward.coefficient.z = 32;
93-
VARY(GPU(convnet)->device[0].layers + 6)->convolutional.backward.gradient.x = 4;
94-
VARY(GPU(convnet)->device[0].layers + 6)->convolutional.backward.gradient.y = 8;
95-
VARY(GPU(convnet)->device[0].layers + 6)->convolutional.backward.gradient.z = 32;
96-
VARY(GPU(convnet)->device[0].layers + 7)->convolutional.backward.coefficient.x = 8;
97-
VARY(GPU(convnet)->device[0].layers + 7)->convolutional.backward.coefficient.y = 3;
98-
VARY(GPU(convnet)->device[0].layers + 7)->convolutional.backward.coefficient.z = 32;
99-
VARY(GPU(convnet)->device[0].layers + 7)->convolutional.backward.gradient.x = 4;
100-
VARY(GPU(convnet)->device[0].layers + 7)->convolutional.backward.gradient.y = 8;
101-
VARY(GPU(convnet)->device[0].layers + 7)->convolutional.backward.gradient.z = 32;
102-
VARY(GPU(convnet)->device[0].layers + 8)->convolutional.backward.coefficient.x = 8;
103-
VARY(GPU(convnet)->device[0].layers + 8)->convolutional.backward.coefficient.y = 4;
104-
VARY(GPU(convnet)->device[0].layers + 8)->convolutional.backward.coefficient.z = 32;
105-
VARY(GPU(convnet)->device[0].layers + 8)->convolutional.backward.gradient.x = 4;
106-
VARY(GPU(convnet)->device[0].layers + 8)->convolutional.backward.gradient.y = 8;
107-
VARY(GPU(convnet)->device[0].layers + 8)->convolutional.backward.gradient.z = 32;
66+
EXTRA(GPU(convnet)->device[0].layers + 0)->vary.convolutional.backward.coefficient.x = 1;
67+
EXTRA(GPU(convnet)->device[0].layers + 0)->vary.convolutional.backward.coefficient.y = 3;
68+
EXTRA(GPU(convnet)->device[0].layers + 0)->vary.convolutional.backward.coefficient.z = 1;
69+
EXTRA(GPU(convnet)->device[0].layers + 3)->vary.convolutional.backward.coefficient.x = 4;
70+
EXTRA(GPU(convnet)->device[0].layers + 3)->vary.convolutional.backward.coefficient.y = 4;
71+
EXTRA(GPU(convnet)->device[0].layers + 3)->vary.convolutional.backward.coefficient.z = 16;
72+
EXTRA(GPU(convnet)->device[0].layers + 3)->vary.convolutional.backward.gradient.x = 4;
73+
EXTRA(GPU(convnet)->device[0].layers + 3)->vary.convolutional.backward.gradient.y = 6;
74+
EXTRA(GPU(convnet)->device[0].layers + 3)->vary.convolutional.backward.gradient.z = 24;
75+
EXTRA(GPU(convnet)->device[0].layers + 6)->vary.convolutional.backward.coefficient.x = 8;
76+
EXTRA(GPU(convnet)->device[0].layers + 6)->vary.convolutional.backward.coefficient.y = 3;
77+
EXTRA(GPU(convnet)->device[0].layers + 6)->vary.convolutional.backward.coefficient.z = 32;
78+
EXTRA(GPU(convnet)->device[0].layers + 6)->vary.convolutional.backward.gradient.x = 4;
79+
EXTRA(GPU(convnet)->device[0].layers + 6)->vary.convolutional.backward.gradient.y = 8;
80+
EXTRA(GPU(convnet)->device[0].layers + 6)->vary.convolutional.backward.gradient.z = 32;
81+
EXTRA(GPU(convnet)->device[0].layers + 7)->vary.convolutional.backward.coefficient.x = 8;
82+
EXTRA(GPU(convnet)->device[0].layers + 7)->vary.convolutional.backward.coefficient.y = 3;
83+
EXTRA(GPU(convnet)->device[0].layers + 7)->vary.convolutional.backward.coefficient.z = 32;
84+
EXTRA(GPU(convnet)->device[0].layers + 7)->vary.convolutional.backward.gradient.x = 4;
85+
EXTRA(GPU(convnet)->device[0].layers + 7)->vary.convolutional.backward.gradient.y = 8;
86+
EXTRA(GPU(convnet)->device[0].layers + 7)->vary.convolutional.backward.gradient.z = 32;
87+
EXTRA(GPU(convnet)->device[0].layers + 8)->vary.convolutional.backward.coefficient.x = 8;
88+
EXTRA(GPU(convnet)->device[0].layers + 8)->vary.convolutional.backward.coefficient.y = 4;
89+
EXTRA(GPU(convnet)->device[0].layers + 8)->vary.convolutional.backward.coefficient.z = 32;
90+
EXTRA(GPU(convnet)->device[0].layers + 8)->vary.convolutional.backward.gradient.x = 4;
91+
EXTRA(GPU(convnet)->device[0].layers + 8)->vary.convolutional.backward.gradient.y = 8;
92+
EXTRA(GPU(convnet)->device[0].layers + 8)->vary.convolutional.backward.gradient.z = 32;
10893
float* a = 0;
10994
cudaMalloc(&a, sizeof(float) * 1000 * batch);
11095
cudaMemcpy(a, GPU(convnet)->device[0].forwards[convnet->count - 1], sizeof(float) * 1000 * batch, cudaMemcpyDeviceToDevice);
@@ -120,7 +105,7 @@ extern "C" void cwc_bench_runtime(ccv_convnet_t* convnet, ccv_array_t* categoriz
120105
if (context->device[0].dor[i])
121106
{
122107
int out_rows, out_cols, out_partition;
123-
_ccv_convnet_layer_derive_output(layer, layer->input.matrix.rows, layer->input.matrix.cols, &out_rows, &out_cols, &out_partition);
108+
ccv_convnet_make_output(layer, layer->input.matrix.rows, layer->input.matrix.cols, &out_rows, &out_cols, &out_partition);
124109
_cwc_kern_mute_neuron
125110
<<<out_rows * out_cols * layer->net.convolutional.count, batch, 0, context->device[0].data_stream>>>
126111
(i == convnet->count - 1 ? a : GPU(convnet)->device[0].backwards[i + 1], context->device[0].dor[i]);
@@ -153,7 +138,7 @@ extern "C" void cwc_bench_runtime(ccv_convnet_t* convnet, ccv_array_t* categoriz
153138
cudaEventSynchronize(stop);
154139
cudaEventElapsedTime(&elapsed_time, start, stop);
155140
if (layer->type == CCV_CONVNET_CONVOLUTIONAL)
156-
printf("%d %d %d, %d %d %d, elapsed time for layer %d bprop: %f milliseconds\n", VARY(layer)->convolutional.backward.coefficient.x, VARY(layer)->convolutional.backward.coefficient.y, VARY(layer)->convolutional.backward.coefficient.z, VARY(layer)->convolutional.backward.gradient.x, VARY(layer)->convolutional.backward.gradient.y, VARY(layer)->convolutional.backward.gradient.z, i + 1, elapsed_time);
141+
printf("%d %d %d, %d %d %d, elapsed time for layer %d bprop: %f milliseconds\n", EXTRA(layer)->vary.convolutional.backward.coefficient.x, EXTRA(layer)->vary.convolutional.backward.coefficient.y, EXTRA(layer)->vary.convolutional.backward.coefficient.z, EXTRA(layer)->vary.convolutional.backward.gradient.x, EXTRA(layer)->vary.convolutional.backward.gradient.y, EXTRA(layer)->vary.convolutional.backward.gradient.z, i + 1, elapsed_time);
157142
else
158143
printf("elapsed time for layer %d bprop: %f milliseconds\n", i + 1, elapsed_time);
159144
}

ccv/cwc-bench.c

+9-8
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ int main(int argc, char** argv)
3131
{
3232
.type = CCV_CONVNET_CONVOLUTIONAL,
3333
.bias = 0,
34-
.sigma = 0.01,
34+
.glorot = sqrtf(2),
3535
.input = {
3636
.matrix = {
3737
.rows = 128,
@@ -56,7 +56,7 @@ int main(int argc, char** argv)
5656
{
5757
.type = CCV_CONVNET_CONVOLUTIONAL,
5858
.bias = 1,
59-
.sigma = 0.01,
59+
.glorot = sqrtf(2),
6060
.input = {
6161
.matrix = {
6262
.rows = 64,
@@ -81,12 +81,12 @@ int main(int argc, char** argv)
8181
{
8282
.type = CCV_CONVNET_CONVOLUTIONAL,
8383
.bias = 0,
84-
.sigma = 0.01,
84+
.glorot = sqrtf(2),
8585
.input = {
8686
.matrix = {
87-
.rows = 32,
88-
.cols = 32,
89-
.channels = 128,
87+
.rows = 13,
88+
.cols = 13,
89+
.channels = 256,
9090
.partition = 1,
9191
},
9292
},
@@ -106,7 +106,7 @@ int main(int argc, char** argv)
106106
{
107107
.type = CCV_CONVNET_CONVOLUTIONAL,
108108
.bias = 1,
109-
.sigma = 0.01,
109+
.glorot = sqrtf(2),
110110
.input = {
111111
.matrix = {
112112
.rows = 16,
@@ -131,7 +131,7 @@ int main(int argc, char** argv)
131131
{
132132
.type = CCV_CONVNET_CONVOLUTIONAL,
133133
.bias = 1,
134-
.sigma = 0.01,
134+
.glorot = sqrtf(2),
135135
.input = {
136136
.matrix = {
137137
.rows = 13,
@@ -169,6 +169,7 @@ int main(int argc, char** argv)
169169
ccv_convnet_train_param_t train_params = {
170170
.max_epoch = 100,
171171
.mini_batch = 128,
172+
.device_count = 1,
172173
.layer_params = layer_params,
173174
};
174175
for (i = 0; i < 128; i++)

0 commit comments

Comments
 (0)