Skip to content

Commit

Permalink
CNTK v2 library: Sequence to sequence python example
Browse files Browse the repository at this point in the history
  • Loading branch information
amitaga committed Sep 6, 2016
1 parent fa12c5e commit 8cff694
Show file tree
Hide file tree
Showing 4 changed files with 228 additions and 109 deletions.
12 changes: 8 additions & 4 deletions bindings/python/cntk/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# ==============================================================================

import numpy as np
from . import sequence
from ..utils import sanitize_input, sanitize_shape, get_data_type

def combine(operands, name=''):
Expand Down Expand Up @@ -874,8 +875,7 @@ def reciprocal(x, name=''):
x = sanitize_input(x)
return reciprocal(x, name).output()

#TODO: enable when it is exposed in c++
def cond(flag, value_if_true, value_if_false, name=''):
def element_select(flag, value_if_true, value_if_false, name=''):
'''
return either value_if_true or value_if_false based on the value of flag.
If flag != 0 value_if_true is returned, otherwise value_if_false.
Expand All @@ -894,7 +894,11 @@ def cond(flag, value_if_true, value_if_false, name=''):
Returns:
:class:`cntk.Function`
'''
raise NotImplementedError("cond is not implemented yet in V2")
from cntk import element_select
flag = sanitize_input(flag)
value_if_true = sanitize_input(value_if_true)
value_if_false = sanitize_input(value_if_false)
return element_select(flag, value_if_true, value_if_false, name).output()

################################################################################
# recurrent ops
Expand Down Expand Up @@ -1232,7 +1236,7 @@ def placeholder_variable(shape, dynamic_axes = [Axis.default_dynamic_axis(), Axi
'''
from cntk import placeholder_variable
shape = sanitize_shape(shape)
return placeholder_variable(shape)
return placeholder_variable(shape, dynamic_axes)

def parameter(shape=None, value=None, device=None, name=''):
'''
Expand Down
144 changes: 144 additions & 0 deletions bindings/python/cntk/ops/sequence/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================

import numpy as np
from ...utils import sanitize_input, sanitize_shape, get_data_type

################################################################################
# sequence ops
################################################################################
def is_first(operand, name = ''):
'''
TBA
Example:
TBA
Args:
operand: the symbolic tensor operand denoting a sequence
name (str): the name of the node in the network
Returns:
:class:`cntk.Function`
'''
from cntk import is_first
operand = sanitize_input(operand, get_data_type(operand))
return is_first(operand, name).output()

def is_last(operand, name = ''):
'''
TBA
Example:
TBA
Args:
operand: the symbolic tensor operand denoting a sequence
name (str): the name of the node in the network
Returns:
:class:`cntk.Function`
'''
from cntk import is_last
operand = sanitize_input(operand, get_data_type(operand))
return is_last(operand, name).output()

def first(operand, name = ''):
'''
TBA
Example:
TBA
Args:
operand: the symbolic tensor operand denoting a sequence
name (str): the name of the node in the network
Returns:
:class:`cntk.Function`
'''
from cntk import first
operand = sanitize_input(operand, get_data_type(operand))
return first(operand, name).output()

def last(operand, name = ''):
'''
TBA
Example:
TBA
Args:
operand: the symbolic tensor operand denoting a sequence
name (str): the name of the node in the network
Returns:
:class:`cntk.Function`
'''
from cntk import last
operand = sanitize_input(operand, get_data_type(operand))
return last(operand, name).output()

def where(condition, name = ''):
'''
TBA
Example:
TBA
Args:
condition: the symbolic tensor operand denoting a boolean condition flag for each step of a sequence
name (str): the name of the node in the network
Returns:
:class:`cntk.Function`
'''
from cntk import where
condition = sanitize_input(condition, get_data_type(condition))
return where(condition, name).output()

def gather(operand, condition, name = ''):
'''
TBA
Example:
TBA
Args:
operand: the symbolic tensor operand denoting a sequence
condition: the symbolic tensor operand denoting a boolean condition flag for each step of a sequence
name (str): the name of the node in the network
Returns:
:class:`cntk.Function`
'''
from cntk import gather
operand = sanitize_input(operand, get_data_type(operand))
condition = sanitize_input(condition, get_data_type(condition))
return gather(operand, condition, name).output()

def scatter(operand, condition, name = ''):
'''
TBA
Example:
TBA
Args:
operand: the symbolic tensor operand denoting a sequence
condition: the symbolic tensor operand denoting a boolean condition flag for each step of a sequence
name (str): the name of the node in the network
Returns:
:class:`cntk.Function`
'''
from cntk import scatter
operand = sanitize_input(operand, get_data_type(operand))
condition = sanitize_input(condition, get_data_type(condition))
return scatter(operand, condition, name).output()

def broadcast_as(operand, broadcast_as_operand, name = ''):
'''
TBA
Example:
TBA
Args:
operand: the symbolic tensor operand denoting a tensor
broadcast_as_operand: the symbolic tensor operand denoting a sequence per whose layout the main operand id to be broadcast
name (str): the name of the node in the network
Returns:
:class:`cntk.Function`
'''
from cntk import broadcast_as
operand = sanitize_input(operand, get_data_type(operand))
broadcast_as_operand = sanitize_input(broadcast_as_operand, get_data_type(broadcast_as_operand))
return broadcast_as(operand, broadcast_as_operand, name).output()
160 changes: 64 additions & 96 deletions bindings/python/examples/Sequence2Sequence/Sequence2Sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
import numpy as np
import sys
import os
import math
import time
from cntk import learning_rates_per_sample, DeviceDescriptor, Trainer, sgdlearner, Axis, get_train_loss, get_train_eval_criterion
from cntk.ops import variable, cross_entropy_with_softmax, classification_error
from examples.common.nn import LSTMP_component_with_self_stabilization, embedding, fully_connected_linear_layer, select_last
from cntk import learning_rates_per_sample, momentums_per_sample, DeviceDescriptor, Trainer, momentum_sgdlearner, Axis, text_format_minibatch_source, StreamConfiguration, print_training_progress
from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, sequence, slice, past_value, future_value, element_select
from examples.common.nn import LSTMP_component_with_self_stabilization, stabilize, linear_layer

# Creates and trains a sequence to sequence translation model
def train_sequence_to_sequence_translator():
Expand All @@ -28,124 +29,91 @@ def train_sequence_to_sequence_translator():
label_dynamic_axes = [ Axis('labelAxis'), Axis.default_batch_axis() ]
raw_labels = input_variable(shape=(label_vocab_dim), dynamic_axes = label_dynamic_axes)

# Instantiate the sequence to sequence translation model
input_sequence = raw_input

# Drop the sentence start token from the label, for decoder training
label_sequence = cntk.ops.slice(raw_labels, label_dynamic_axes[0], 1, 0)
label_sentence_start = Sequence.first(raw_labels)
label_sequence = slice(raw_labels, label_dynamic_axes[0], 1, 0)
label_sentence_start = sequence.first(raw_labels)

is_first_label = Sequence.is_first(label_sequence)

label_sentence_start_scattered = Sequence.scatter(label_sentence_start, is_first_label)
is_first_label = sequence.is_first(label_sequence)
label_sentence_start_scattered = sequence.scatter(label_sentence_start, is_first_label)

# Encoder
encoderOutputH = stabilize<float>(inputEmbedding, device)
futureValueRecurrenceHook = [](const Variable& x) { return FutureValue(x) }
for (size_t i = 0 i < num_layers ++i)
std::tie(encoderOutputH, encoderOutputC) = LSTMPComponentWithSelfStabilization<float>(encoderOutputH, hidden_dim, hidden_dim, futureValueRecurrenceHook, futureValueRecurrenceHook, device)

thoughtVectorH = Sequence::First(encoderOutputH)
thoughtVectorC = Sequence::First(encoderOutputC)

thoughtVectorBroadcastH = Sequence::BroadcastAs(thoughtVectorH, labelEmbedding)
thoughtVectorBroadcastC = Sequence::BroadcastAs(thoughtVectorC, labelEmbedding)

/* Decoder */
bool addBeamSearchReorderingHook = false
beamSearchReorderHook = Constant({ 1, 1 }, 1.0f)
decoderHistoryFromGroundTruth = labelEmbedding
decoderInput = ElementSelect(is_first_label, label_sentence_startEmbeddedScattered, PastValue(decoderHistoryFromGroundTruth))

decoderOutputH = Stabilize<float>(decoderInput, device)
FunctionPtr decoderOutputC
pastValueRecurrenceHookWithBeamSearchReordering = [addBeamSearchReorderingHook, beamSearchReorderHook](const FunctionPtr& operand) {
return PastValue(addBeamSearchReorderingHook ? Times(operand, beamSearchReorderHook) : operand)
}

for (size_t i = 0 i < num_layers ++i)
{
std::function<FunctionPtr(const Variable&)> recurrenceHookH, recurrenceHookC
if (i == 0)
{
recurrenceHookH = pastValueRecurrenceHookWithBeamSearchReordering
recurrenceHookC = pastValueRecurrenceHookWithBeamSearchReordering
}
else
{
isFirst = Sequence::IsFirst(labelEmbedding)
recurrenceHookH = [labelEmbedding, thoughtVectorBroadcastH, isFirst, addBeamSearchReorderingHook, beamSearchReorderHook](const FunctionPtr& operand) {
return ElementSelect(isFirst, thoughtVectorBroadcastH, PastValue(addBeamSearchReorderingHook ? Times(operand, beamSearchReorderHook) : operand))
}

recurrenceHookC = [labelEmbedding, thoughtVectorBroadcastC, isFirst, addBeamSearchReorderingHook, beamSearchReorderHook](const FunctionPtr& operand) {
return ElementSelect(isFirst, thoughtVectorBroadcastC, PastValue(addBeamSearchReorderingHook ? Times(operand, beamSearchReorderHook) : operand))
}
}

std::tie(decoderOutputH, encoderOutputC) = LSTMPComponentWithSelfStabilization<float>(decoderOutputH, hidden_dim, hidden_dim, recurrenceHookH, recurrenceHookC, device)
}

decoderOutput = decoderOutputH
decoderDim = hidden_dim

/* Softmax output layer */
outputLayerProjWeights = Parameter(NDArrayView::RandomUniform<float>({ label_vocab_dim, decoderDim }, -0.05, 0.05, 1, device))
biasWeights = Parameter({ label_vocab_dim }, 0.0f, device)

z = Plus(Times(outputLayerProjWeights, Stabilize<float>(decoderOutput, device)), biasWeights, L"classifierOutput")
ce = CrossEntropyWithSoftmax(z, label_sequence, L"lossFunction")
errs = ClassificationError(z, label_sequence, L"classificationError")




input_dim = 2000
cell_dim = 25
hidden_dim = 25
embedding_dim = 50
num_output_classes = 5

# Input variables denoting the features and label data
features = variable(shape=input_dim, is_sparse=True, name="features")
label = variable(num_output_classes, dynamic_axes = [Axis.default_batch_axis()], name="labels")

# Instantiate the sequence classification model
classifier_output = LSTM_sequence_classifer_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim)

ce = cross_entropy_with_softmax(classifier_output, label)
pe = classification_error(classifier_output, label)

rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf"
encoder_outputH = stabilize(input_sequence)
for i in range(0, num_layers):
(encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(encoder_outputH, hidden_dim, hidden_dim, future_value, future_value)

thought_vectorH = sequence.first(encoder_outputH)
thought_vectorC = sequence.first(encoder_outputC)

thought_vector_broadcastH = sequence.broadcast_as(thought_vectorH, label_sequence)
thought_vector_broadcastC = sequence.broadcast_as(thought_vectorC, label_sequence)

decoder_history_from_ground_truth = label_sequence
decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(decoder_history_from_ground_truth))

decoder_outputH = stabilize(decoder_input)
for i in range(0, num_layers):
if (i == 0):
recurrence_hookH = past_value
recurrence_hookC = past_value
else:
isFirst = sequence.is_first(label_sequence)
recurrence_hookH = lambda operand: element_select(isFirst, thought_vector_broadcastH, past_value(operand))
recurrence_hookC = lambda operand: element_select(isFirst, thought_vector_broadcastC, past_value(operand))

(decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(decoder_outputH, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC)

decoder_output = decoder_outputH
decoder_dim = hidden_dim

# Softmax output layer
z = linear_layer(stabilize(decoder_output), label_vocab_dim)
ce = cross_entropy_with_softmax(z, label_sequence)
errs = classification_error(z, label_sequence)

rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf"
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)
feature_stream_name = 'features'
labels_stream_name = 'labels'

mb_source = text_minibatch_source(path, [ ( 'features', input_dim, True, 'x' ), ( 'labels', num_output_classes, False, 'y' ) ], 0)
features_si = mb_source.stream_info(features)
labels_si = mb_source.stream_info(label)
mb_source = text_format_minibatch_source(path, list([
StreamConfiguration( feature_stream_name, input_vocab_dim, True, 'S0' ),
StreamConfiguration( labels_stream_name, label_vocab_dim, True, 'S1') ]), 10000)
features_si = mb_source.stream_info(feature_stream_name)
labels_si = mb_source.stream_info(labels_stream_name)

# Instantiate the trainer object to drive the model training
lr = lr = learning_rates_per_sample(0.0005)
trainer = Trainer(classifier_output, ce, pe, [sgdlearner(classifier_output.owner.parameters(), lr)])
lr = learning_rates_per_sample(0.007)
momentum_time_constant = 1100
momentum_per_sample = momentums_per_sample(math.exp(-1.0 / momentum_time_constant))
clipping_threshold_per_sample = 2.3
gradient_clipping_with_truncation = True

trainer = Trainer(z, ce, errs, [momentum_sgdlearner(z.owner.parameters(), lr, momentum_per_sample, clipping_threshold_per_sample, gradient_clipping_with_truncation)])

# Get minibatches of sequences to train with and perform model training
minibatch_size = 200
minibatch_size = 72
training_progress_output_freq = 1
i = 0
while True:
mb = mb_source.get_next_minibatch(minibatch_size)
if len(mb) == 0:
break

# Specify the mapping of input variables in the model to actual minibatch data to be trained with
arguments = {features : mb[features_si].m_data, label : mb[labels_si].m_data}
arguments = {raw_input : mb[features_si].m_data, raw_labels : mb[labels_si].m_data}
trainer.train_minibatch(arguments)

print_training_progress(training_progress_output_freq, i, trainer)
print_training_progress(i, trainer, training_progress_output_freq)

i += 1

if __name__=='__main__':
if __name__=='__main__':

#time.sleep(10)
# Specify the target device to be used for computing
target_device = DeviceDescriptor.cpu_device()
DeviceDescriptor.set_default_device(target_device)

train_sequence_classifier()
train_sequence_to_sequence_translator()
Loading

0 comments on commit 8cff694

Please sign in to comment.