Skip to content

Commit

Permalink
fixed divide by zero error
Browse files Browse the repository at this point in the history
  • Loading branch information
atgambardella authored and soumith committed Feb 23, 2017
1 parent f3a883c commit 6be19f9
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion reinforcement_learning/actor_critic.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def finish_episode():
R = r + args.gamma * R
rewards.insert(0, R)
rewards = torch.Tensor(rewards)
rewards = (rewards - rewards.mean()) / rewards.std()
rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps)
for (action, value), r in zip(saved_actions, rewards):
action.reinforce(r - value.data.squeeze())
value_loss += F.smooth_l1_loss(value, Variable(torch.Tensor([r])))
Expand Down
2 changes: 1 addition & 1 deletion reinforcement_learning/reinforce.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def finish_episode():
R = r + args.gamma * R
rewards.insert(0, R)
rewards = torch.Tensor(rewards)
rewards = (rewards - rewards.mean()) / rewards.std()
rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps)
for action, r in zip(model.saved_actions, rewards):
action.reinforce(r)
optimizer.zero_grad()
Expand Down

0 comments on commit 6be19f9

Please sign in to comment.