Skip to content

Commit

Permalink
Replay buffer will normalize rewards by default
Browse files Browse the repository at this point in the history
  • Loading branch information
shariqiqbal2810 committed Feb 13, 2018
1 parent f3e5720 commit e065185
Showing 1 changed file with 9 additions and 2 deletions.
11 changes: 9 additions & 2 deletions utils/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,23 @@ def push(self, observations, actions, rewards, next_observations, dones):
if self.curr_i == self.max_steps:
self.curr_i = 0

def sample(self, N, to_gpu=False):
def sample(self, N, to_gpu=False, norm_rews=True):
inds = np.random.choice(np.arange(self.filled_i), size=N,
replace=False)
if to_gpu:
cast = lambda x: Variable(Tensor(x), requires_grad=False).cuda()
else:
cast = lambda x: Variable(Tensor(x), requires_grad=False)
if norm_rews:
ret_rews = [cast((self.rew_buffs[i][inds] -
self.rew_buffs[i][:self.filled_i].mean()) /
self.rew_buffs[i][:self.filled_i].std())
for i in range(self.num_agents)]
else:
ret_rews = [cast(self.rew_buffs[i][inds]) for i in range(self.num_agents)]
return ([cast(self.obs_buffs[i][inds]) for i in range(self.num_agents)],
[cast(self.ac_buffs[i][inds]) for i in range(self.num_agents)],
[cast(self.rew_buffs[i][inds]) for i in range(self.num_agents)],
ret_rews,
[cast(self.next_obs_buffs[i][inds]) for i in range(self.num_agents)],
[cast(self.done_buffs[i][inds]) for i in range(self.num_agents)])

Expand Down

0 comments on commit e065185

Please sign in to comment.