Skip to content

Commit

Permalink
modify train_rl to use mean_return as well as success_rate (mila-iqia#60
Browse files Browse the repository at this point in the history
)

* modify train_rl to use mean_return as well as success_rate

* add bool save_model to reduce copy-paste
  • Loading branch information
dyth authored and maximecb committed Mar 28, 2019
1 parent 27923b9 commit 66bcf6c
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions scripts/train_rl.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@

total_start_time = time.time()
best_success_rate = 0
best_mean_return = 0
test_env_name = args.env
while status['num_frames'] < args.frames:
# Update parameters
Expand Down Expand Up @@ -233,8 +234,14 @@
agent.model.train()
mean_return = np.mean(logs["return_per_episode"])
success_rate = np.mean([1 if r > 0 else 0 for r in logs['return_per_episode']])
save_model = False
if success_rate > best_success_rate:
best_success_rate = success_rate
save_model = True
elif (success_rate == best_success_rate) and (mean_return > best_mean_return):
best_mean_return = mean_return
save_model = True
if save_model:
utils.save_model(acmodel, args.model + '_best')
obss_preprocessor.vocab.save(utils.get_vocab_path(args.model + '_best'))
logger.info("Return {: .2f}; best model is saved".format(mean_return))
Expand Down

0 comments on commit 66bcf6c

Please sign in to comment.