Skip to content

Commit

Permalink
learning rate argument
Browse files Browse the repository at this point in the history
  • Loading branch information
saransh-mehta committed Jun 18, 2020
1 parent 0046faa commit c26326c
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 2 deletions.
2 changes: 2 additions & 0 deletions infer_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ def __init__(self, modelPath, maxSeqLen = 128):
# dummy values
allParams['num_train_steps'] = 10
allParams['warmup_steps'] = 0
allParams['learning_rate'] = 2e-5
allParams['epsilon'] = 1e-8

#making and loading model
self.model = multiTaskModel(allParams)
Expand Down
6 changes: 4 additions & 2 deletions models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,12 +153,14 @@ def __init__(self, params):
#print(self.network.state_dict().keys())
#optimizer and scheduler
self.optimizer, self.scheduler = self.make_optimizer(numTrainSteps=self.params['num_train_steps'],
warmupSteps=self.params['warmup_steps'])
warmupSteps=self.params['warmup_steps'],
lr = self.params["learning_rate"],
eps = self.params["epsilon"])
#loss class list
self.lossClassList = self.make_loss_list()


def make_optimizer(self, numTrainSteps, lr = 2e-5, eps = 1e-8, warmupSteps=0):
def make_optimizer(self, numTrainSteps, lr, eps, warmupSteps=0):
# we will use AdamW optimizer from huggingface transformers. This optimizer is
#widely used with BERT. It is modified form of Adam which is used in Tensorflow
#implementations
Expand Down
2 changes: 2 additions & 0 deletions run_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ def main():
# dummy values
allParams['num_train_steps'] = 10
allParams['warmup_steps'] = 0
allParams['learning_rate'] = 2e-5
allParams['epsilon'] = 1e-8

#making and loading model
model = multiTaskModel(allParams)
Expand Down
7 changes: 7 additions & 0 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ def make_arguments(parser):
help = "number of steps to accumulate gradients before update")
parser.add_argument('--num_of_warmup_steps', type=int, default = 0,
help = "warm-up value for scheduler")
parser.add_argument('--learning_rate', type=float, default=2e-5,
help = "learning rate for optimizer")
parser.add_argument('--epsilon', type=float, default=1e-8,
help="epsilon value for optimizer")
parser.add_argument('--grad_clip_value', type = float, default=1.0,
help = "gradient clipping value to avoid gradient overflowing" )
parser.add_argument('--log_file', default='multi_task_logs.log', type = str,
Expand Down Expand Up @@ -215,6 +219,9 @@ def main():
#making multi-task model
allParams['num_train_steps'] = math.ceil(len(multiTaskDataLoaderTrain)/args.train_batch_size) *args.epochs // args.grad_accumulation_steps
allParams['warmup_steps'] = args.num_of_warmup_steps
allParams['learning_rate'] = args.learning_rate
allParams['epsilon'] = args.epsilon

logger.info("NUM TRAIN STEPS: {}".format(allParams['num_train_steps']))
logger.info("len of dataloader: {}".format(len(multiTaskDataLoaderTrain)))
logger.info("Making multi-task model...")
Expand Down

0 comments on commit c26326c

Please sign in to comment.