Skip to content

Commit

Permalink
LearnableParameter::InitRandom() now mimics the Matrix initialization…
Browse files Browse the repository at this point in the history
… behavior;

bug fix: LookupTableNode used GetAsMatrixNumRows() on input[1] which is a minibatch;
bug fix: Image/QuickE2E network definition updated to drop the now unnecessary extra column dimension;
bug fix: TensorShape::IsDense() should not require m_offset to be 0 (column slices are perfectly fine)
  • Loading branch information
frankseide committed Jan 21, 2016
1 parent d44c9ef commit e389459
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 19 deletions.
2 changes: 0 additions & 2 deletions Source/Common/Include/TensorShape.h
Original file line number Diff line number Diff line change
Expand Up @@ -388,8 +388,6 @@ struct TensorShape
// verify that this refers to a dense matrix (no strides)
void VerifyIsDense() const
{
if (m_offset != 0)
LogicError("TensorShape: A dense TensorShape expected. Offset %d not allowed.", (int) m_offset);
for (size_t k = 0; k < m_dims.size(); k++) // (TODO: we can save one multiplication here)
{
ptrdiff_t stride = k > 0 ? m_strides[k - 1] * (ptrdiff_t) m_dims[k - 1] : 1;
Expand Down
2 changes: 1 addition & 1 deletion Source/ComputationNetworkLib/ComputationNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ class ComputationNodeBase : public IComputationNode,
if (HasMBLayout())
LogicError("CheckTensorIsMatrix: Minibatch data cannot be interpreted as a single 2D tensor.");
else if (m_sampleLayout.GetRank() < 1 || m_sampleLayout.GetRank() > 2) // note: scalars are not stored as tensors of rank 0, but rather as 1-dim vectors. TODO: clean this up some day
LogicError("CheckTensorIsMatrix: Sample is now a 2D tensor.");
LogicError("CheckTensorIsMatrix: Sample is not a column vector or matrix (1D or 2D tensor).");
}
public:
size_t GetAsMatrixNumRows() const
Expand Down
17 changes: 11 additions & 6 deletions Source/ComputationNetworkLib/InputAndParamNodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,21 +130,26 @@ class LearnableParameter : public ComputationNode<ElemType>, public NumInputs<0>

// the random seed offset is set via the "randomSeedOffset" parameter in config
if (initOnCPUOnly)
m_value->TransferToDeviceIfNotThereAndNotAutoPlace(CPUDEVICE, true);
Value().TransferToDeviceIfNotThereAndNotAutoPlace(CPUDEVICE, true);
#if 1 // this more complex version is needed to repro test cases generated with an older version
auto value = GetSampleLayout().GetRank() > 2 ? Value() : ValueAsMatrix();
#else
auto value = Value();
#endif
if (uniformInit)
{
// TODO: move these hidden extra factors out from here and into NDL, and make them visible in BS
ElemType randRange = 0.05f * initValueScale;
Value().SetUniformRandomValue(-randRange, randRange, randomSeed);
value.SetUniformRandomValue(-randRange, randRange, randomSeed);
}
else
{
size_t inputSize = GetAsMatrixNumCols();
ElemType randInitstd = 0.2f * initValueScale / sqrt(ElemType(inputSize));
Value().SetGaussianRandomValue(0, randInitstd, randomSeed);
value.SetGaussianRandomValue(0, randInitstd, randomSeed);
}
if (initOnCPUOnly)
m_value->TransferToDeviceIfNotThereAndNotAutoPlace(m_deviceId, true);
Value().TransferToDeviceIfNotThereAndNotAutoPlace(m_deviceId, true);
}

// initialize by reading a matrix from a text file
Expand Down Expand Up @@ -492,10 +497,10 @@ class LookupTableNode : public ComputationNode<ElemType>, public NumInputs<2>

if (isFinalValidationPass && !HasMBLayout())
InvalidArgument("%ls %ls operation can only operate on minibatches.", NodeName().c_str(), OperationName().c_str());
if (isFinalValidationPass && Input(1)->GetAsMatrixNumRows() % Input(0)->GetAsMatrixNumCols() != 0)
if (isFinalValidationPass && Input(1)->GetSampleMatrixNumRows() % Input(0)->GetAsMatrixNumCols() != 0)
InvalidArgument("Mismatched dimension. Rows in input1 must be multiples of cols in input0.");

int wordsInEachSample = Input(1)->GetAsMatrixNumRows() / Input(0)->GetAsMatrixNumCols();
size_t wordsInEachSample = Input(1)->GetSampleMatrixNumRows() / Input(0)->GetAsMatrixNumCols();

// TODO: Should this add a tensor dimension?
SetDims(TensorShape(Input(0)->GetSampleMatrixNumRows() * wordsInEachSample), true);
Expand Down
4 changes: 2 additions & 2 deletions Tests/EndToEndTests/Image/QuickE2E/cntk.config
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ train = [
convW = Parameter(outMap, inWCount, init="uniform", initValueScale=wScale, initOnCPUOnly=false)
conv = Convolution(convW, inp, kW, kH, outMap, hStride, vStride, zeroPadding=false, imageLayout=if useCuDnn then "cudnn" else "legacy")
convB = if useCuDnn
then ParameterTensor((1 : 1 : outMap : 1/*col dim*/), init="fixedValue", value=bValue)
else Parameter(outMap, 1, init="fixedValue", value=bValue)
then ParameterTensor((1 : 1 : outMap), init="fixedValue", value=bValue)
else Parameter(outMap, 1, init="fixedValue", value=bValue)
convPlusB = Plus(conv, convB);
out = RectifiedLinear(convPlusB);
]
Expand Down
17 changes: 9 additions & 8 deletions Tests/EndToEndTests/Speech/LSTM/cntk.config
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ speechTrain = [
ExperimentalNetworkBuilder=[

WeightParam(m,n) = Parameter(m, n, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1)
#BiasParam(m) = ParameterTensor(m, init='fixedValue', value=0.0)
BiasParam(m) = Parameter(m, 1, init='fixedValue', value=0.0)
ScalarParam() = Parameter(1, 1, init='fixedValue', value=0.0)

Expand All @@ -70,27 +71,27 @@ speechTrain = [
PastValueShift(dimDummy, input) = Shift(input, /*fromOffsets=*/-1, /*boundaryValue=*/Constant(0.1), dim=-1)
PastValue1 = PastValue
#PastValue1 = PastValueShift
dh = PastValue1(outputDim, output); // hidden state(t-1)
dc = PastValue1(cellDim, ct); // cell(t-1)
dh = PastValue1(outputDim, output); // hidden state(t-1)
dc = PastValue1(cellDim, ct); // cell(t-1)

// note: the W(inputx) here are all different, they all come with their own set of weights; same for H(dh), C(dc), and B()
it = Sigmoid(W(inputx) + B() + H(dh) + C(dc)) // input gate(t)
bit = it .* Tanh(W(inputx) + (H(dh) + B())) // applied to tanh of input network
it = Sigmoid(W(inputx) + B() + H(dh) + C(dc)) // input gate(t)
bit = it .* Tanh(W(inputx) + (H(dh) + B())) // applied to tanh of input network

ft = Sigmoid(W(inputx) + B() + H(dh) + C(dc)) // forget-me-not gate(t)
ft = Sigmoid(W(inputx) + B() + H(dh) + C(dc)) // forget-me-not gate(t)
bft = ft .* dc // applied to cell(t-1)

ct = bft + bit // c(t) is sum of both

ot = Sigmoid(W(inputx) + B() + H(dh) + C(ct)) // output gate(t)
ot = Sigmoid(W(inputx) + B() + H(dh) + C(ct)) // output gate(t)
mt = ot .* Tanh(ct) // applied to tanh(cell(t))

output = Wmr * Stabilize(mt) // projection
]

// define basic I/O
baseFeatDim = 33
featDim = 11 * baseFeatDim // TODO: 363--is this the correct explanation?
featDim = 11 * baseFeatDim
labelDim = 132

// hidden dimensions
Expand All @@ -101,7 +102,7 @@ speechTrain = [
// features
features = Input(featDim, tag='feature')
labels = Input(labelDim, tag='label')
feashift = RowSlice(featDim - baseFeatDim, baseFeatDim, features); # shift 5 frames right (x_{t+5} -> x_{t} ) // TODO why 5? Where do I see this?
feashift = RowSlice(featDim - baseFeatDim, baseFeatDim, features); # shift 5 frames right (x_{t+5} -> x_{t} ) // TODO why 5? Where do I see this? Seems to be the last frame!

featNorm = MeanVarNorm(feashift)

Expand Down

0 comments on commit e389459

Please sign in to comment.