Skip to content

Commit

Permalink
ClassBasedCrossEntropyWithSoftmaxNode now taking advantage of optimiz…
Browse files Browse the repository at this point in the history
…ed CPU-side element access
  • Loading branch information
frankseide committed Mar 4, 2016
1 parent 50f6cd6 commit e0be5a1
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 14 deletions.
17 changes: 6 additions & 11 deletions Source/ComputationNetworkLib/TrainingNodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -712,12 +712,8 @@ template class NoiseContrastiveEstimationNode<double>;
template <class ElemType>
class ClassBasedCrossEntropyWithSoftmaxNode : public ComputationNodeNonLooping /*ComputationNode*/<ElemType>, public NumInputs<4>
{
typedef ComputationNodeNonLooping<ElemType> Base;
UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName()
{
return L"ClassBasedCrossEntropyWithSoftmax";
}
typedef ComputationNodeNonLooping<ElemType> Base; UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName() { return L"ClassBasedCrossEntropyWithSoftmax"; }

// our inputs
static const size_t LABELDATA = 0;
Expand Down Expand Up @@ -850,9 +846,8 @@ class ClassBasedCrossEntropyWithSoftmaxNode : public ComputationNodeNonLooping /
// -sum(left_i * log(softmax_i(right)))
virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override
{
if (Input(LABELDATA)->Value().GetDeviceId() != CPUDEVICE)
LogicError("ClassBasedCrossEntropyWithSoftmax (ForwardPropNonLooping()): The label matrix is not using CPU device. This will make computation slow, even though the label data is probably saved on GPU. Because of the external loop over time with explicit class id retrieved from the label matrix, the computation will be very slow if the label matrix is saved on GPU. However, this is only a constraint for label matrix and other matrices such as data are suggested to reside on GPU. ");
// TODO: Get the label matrix into location=Both state.
// get the label matrix to CPU, ideally in location=BOTH state
Input(LABELDATA)->Value().TransferToDeviceIfNotThere(CPUDEVICE, /*ismoved =*/ false/*means: BOTH state OK*/, /*emptyTransfer =*/ false, /*updatePreferredDevice =*/ false);

auto& functionValues = Value();

Expand All @@ -868,11 +863,11 @@ class ClassBasedCrossEntropyWithSoftmaxNode : public ComputationNodeNonLooping /
m_totalNbrWords = ForColumnsWithClass([](size_t /*s*/, size_t /*t*/, const FrameRange& /*fr*/, size_t y_t, size_t /*c_t*/, size_t /*sz*/, size_t lft_bnd, size_t nbr_wrd)
{
if (nbr_wrd == 0)
LogicError("ClassBasedCrossEntropyWithSoftmax: Encountered a class of size 0. This sample seems to lack an NoInput flag.");
LogicError("ClassBasedCrossEntropyWithSoftmax: Encountered a class of size 0.");
if (y_t < lft_bnd || y_t >= lft_bnd + nbr_wrd)
LogicError("ClassBasedCrossEntropyWithSoftmax: Word index out of bounds of class-member index range (word not a class member).");
});
// m_totalNbrWords = total size of concatenated vector
// now m_totalNbrWords = total size of concatenated vector

// buffer to hold the concatenated class-conditioned prob vectors
m_softMax.Resize(1, m_totalNbrWords);
Expand Down
6 changes: 3 additions & 3 deletions Source/Math/Matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ void Matrix<ElemType>::SetDataLocation(CurrentDataLocation location, MatrixType
m_baseMatrix = ((m_currentDataLocation == CurrentDataLocation::CPU) ? (BaseMatrix<ElemType>*) m_CPUSparseMatrix : (BaseMatrix<ElemType>*) m_GPUSparseMatrix);

// sanity check
if (!m_baseMatrix)
if (!m_baseMatrix && m_matrixType != MatrixType::UNDETERMINED)
LogicError("SetDataLocation: new m_baseMatrix must not be NULL.");
}

Expand Down Expand Up @@ -777,7 +777,7 @@ void Matrix<ElemType>::CopySection(size_t numRows, size_t numCols, ElemType* dst
template <class ElemType>
Matrix<ElemType> Matrix<ElemType>::ColumnSlice(size_t startColumn, size_t numCols) const
{
int devId = GetDeviceId(); // BUGBUG: Must work off current location
int devId = GetDeviceId();

Matrix<ElemType> slice(matrixFlagDontOwnBuffer, (DEVICEID_TYPE) devId); // this already creates pointers

Expand Down Expand Up @@ -1075,7 +1075,7 @@ const ElemType Matrix<ElemType>::operator()(const size_t row, const size_t col)
// WARNING: This function is very slow for GPUs since it requires copying values between CPUs and GPUs.
// In addition, if ColumnSlice is used after this function but before the values are copied back to GPU
// the operation will fail since the memory is not managed by the slice.
// If you don't need to modify the values, please make sure to call the const version above.
// If you don't need to modify the values, to call the const version above, or GetValue(row,col) which does that for you unambiguously.
// TODO: Can we remove this, and have users use SetValue() instead? To avoid this potential error?
template <class ElemType>
ElemType& Matrix<ElemType>::operator()(const size_t row, const size_t col)
Expand Down

0 comments on commit e0be5a1

Please sign in to comment.