Skip to content

Commit

Permalink
[rllib] Fix KeyError: 'kl' in multiagent ppo training
Browse files Browse the repository at this point in the history
  • Loading branch information
ericl authored Jan 10, 2019
1 parent 6fc3fc4 commit 7124320
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions python/ray/rllib/optimizers/multi_gpu_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,11 @@ def step(self):
"This may be because you have many workers or "
"long episodes in 'complete_episodes' batch mode.")
else:
samples = self.local_evaluator.sample()
samples = []
while sum(s.count for s in samples) < self.train_batch_size:
samples.append(self.local_evaluator.sample())
samples = SampleBatch.concat_samples(samples)

# Handle everything as if multiagent
if isinstance(samples, SampleBatch):
samples = MultiAgentBatch({
Expand Down Expand Up @@ -174,7 +178,8 @@ def step(self):
with self.grad_timer:
for policy_id, tuples_per_device in num_loaded_tuples.items():
optimizer = self.optimizers[policy_id]
num_batches = (
num_batches = max(
1,
int(tuples_per_device) // int(self.per_device_batch_size))
logger.debug("== sgd epochs for {} ==".format(policy_id))
for i in range(self.num_sgd_iter):
Expand Down

0 comments on commit 7124320

Please sign in to comment.