Skip to content

Commit

Permalink
explain use of scratch diffs in comments
Browse files Browse the repository at this point in the history
a few layers make use of otherwise unused diffs to accumulate results,
but unless the diffs are cleared in forward this contaminates the
gradients when these layers share a bottom and their backward is
skipped.
  • Loading branch information
shelhamer committed Jan 29, 2018
1 parent 12a7038 commit 4116590
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 9 deletions.
6 changes: 3 additions & 3 deletions src/caffe/layers/accuracy_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,8 @@ void AccuracyLayer<Dtype>::Forward_gpu(
const int dim = bottom[0]->count() / outer_num_;
const int num_labels = bottom[0]->shape(label_axis_);
const int nthreads = outer_num_ * inner_num_;
// Since this memory is not used for anything,
// we use it here to avoid having to allocate new GPU
// memory to accumulate intermediate results in the kernel.
// Since this memory is not used for anything, we use it here to avoid having
// to allocate new GPU memory to accumulate intermediate results.
Dtype* acc_data = bottom[0]->mutable_gpu_diff();
if (top.size() == 1) {
// simple case - report only global accuracy.
Expand Down Expand Up @@ -134,6 +133,7 @@ void AccuracyLayer<Dtype>::Forward_gpu(
}
}
}
// Clear scratch memory to prevent interfering with backward (see #6202).
caffe_gpu_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_gpu_diff());
}

Expand Down
6 changes: 3 additions & 3 deletions src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,8 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Forward_gpu(
// Stable version of loss computation from input data
const Dtype* input_data = bottom[0]->gpu_data();
const Dtype* target = bottom[1]->gpu_data();
// Since this memory is not used for anything until it is overwritten
// on the backward pass, we use it here to avoid having to allocate new GPU
// memory to accumulate intermediate results in the kernel.
// Since this memory is not used for anything, we use it here to avoid having
// to allocate new GPU memory to accumulate intermediate results.
Dtype* loss_data = bottom[0]->mutable_gpu_diff();
Dtype* count_data = bottom[1]->mutable_gpu_diff();
Dtype valid_count;
Expand All @@ -70,6 +69,7 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Forward_gpu(
normalizer_ = get_normalizer(normalization_, valid_count);
top[0]->mutable_cpu_data()[0] = loss / normalizer_;

// Clear scratch memory to prevent interfering with backward (see #6202).
caffe_gpu_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_gpu_diff());
caffe_gpu_set(bottom[1]->count(), Dtype(0), bottom[1]->mutable_gpu_diff());
}
Expand Down
6 changes: 3 additions & 3 deletions src/caffe/layers/softmax_loss_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,8 @@ void SoftmaxWithLossLayer<Dtype>::Forward_gpu(
const Dtype* label = bottom[1]->gpu_data();
const int dim = prob_.count() / outer_num_;
const int nthreads = outer_num_ * inner_num_;
// Since this memory is not used for anything until it is overwritten
// on the backward pass, we use it here to avoid having to allocate new GPU
// memory to accumulate intermediate results in the kernel.
// Since this memory is not used for anything, we use it here to avoid having
// to allocate new GPU memory to accumulate intermediate results.
Dtype* loss_data = bottom[0]->mutable_gpu_diff();
// Similarly, this memory is never used elsewhere, and thus we can use it
// to avoid having to allocate additional GPU memory.
Expand All @@ -62,6 +61,7 @@ void SoftmaxWithLossLayer<Dtype>::Forward_gpu(
top[1]->ShareData(prob_);
}

// Clear scratch memory to prevent interfering with backward (see #6202).
caffe_gpu_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_gpu_diff());
}

Expand Down

0 comments on commit 4116590

Please sign in to comment.