Skip to content

Commit

Permalink
Merge pull request mila-iqia#19 from mila-udem/dima-byebye-gotoadjpos
Browse files Browse the repository at this point in the history
fix several bugs in the bot
  • Loading branch information
maximecb authored Jan 22, 2019
2 parents 07ddcd0 + d0ff7ce commit f63d2bb
Show file tree
Hide file tree
Showing 8 changed files with 371 additions and 360 deletions.
7 changes: 4 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ script:
# Quickly exercise the RL training code
- time python3 -m scripts.train_rl --env BabyAI-GoToObj-v0 --algo ppo --procs 4 --batch-size 80 --log-interval 1 --save-interval 2 --val-episodes 10 --frames 300 --arch cnn1 --instr-dim 16 --image-dim 16 --memory-dim 16

# Check that the bot works on Boss Level
- python3 -m scripts.eval_bot --level BossLevel --num_runs 50 | grep "100.0%"
- python3 -m scripts.eval_bot --level BossLevel --num_runs 50 --advise_mode --non_optimal_steps 100 --bad_action_proba .3 | grep "100.0%"
# Check that the bot works on a few episodes of Boss Level and one episode of each level
- python3 -m scripts.eval_bot --level BossLevel --num_runs 50
- python3 -m scripts.eval_bot --level BossLevel --num_runs 50 --advise_mode --non_optimal_steps 100 --bad_action_proba .3
- python3 -m scripts.eval_bot --num_runs 1

# Quickly test the generation of bot demos
- python3 -m scripts.make_agent_demos --env BabyAI-GoToRedBallGrey-v0 --episodes 100 --valid-episodes 32
Expand Down
540 changes: 213 additions & 327 deletions babyai/bot.py

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions babyai/levels/bonus_levels.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,6 +1009,9 @@ def __init__(self, seed=None):
seed=seed
)

for name, level in list(globals().items()):
if name.startswith('Level_'):
level.is_bonus = True

# Register the levels in this file
register_levels(__name__, globals())
4 changes: 4 additions & 0 deletions babyai/levels/levelgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ def validate_instrs(self, instr):
instr.reset_verifier(self)

# Check that the objects are not already next to each other
if set(instr.desc_move.obj_set).intersection(
set(instr.desc_fixed.obj_set)):
raise RejectSampling(
"there are objects that match both lhs and rhs of PutNext")
if instr.objs_next():
raise RejectSampling('objs already next to each other')

Expand Down
123 changes: 120 additions & 3 deletions babyai/levels/test_levels.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class Level_TestGoToBlocked(RoomGridLevel):
Go to a yellow ball that is blocked with a lot of red balls.
"""

def __init__(self, room_size=8, seed=None):
def __init__(self, seed=None):
super().__init__(
num_rows=1,
num_cols=1,
Expand All @@ -32,7 +32,7 @@ def gen_mission(self):
for i in (1, 2, 3):
for j in (1, 2, 3):
if (i, j) not in [(1 ,1), (3, 3)]:
self.grid.set(i, j, Ball('red'))
self.place_obj(Ball('red'), (i, j), (1, 1))
self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))


Expand All @@ -42,7 +42,7 @@ class Level_TestPutNextToBlocked(RoomGridLevel):
Pick up a yellow ball and put it next to a blocked blue ball.
"""

def __init__(self, room_size=8, seed=None):
def __init__(self, seed=None):
super().__init__(
num_rows=1,
num_cols=1,
Expand All @@ -64,4 +64,121 @@ def gen_mission(self):
ObjDesc(obj2.type, obj2.color))


class Level_TestPutNextToCloseToDoor1(RoomGridLevel):
"""
The yellow ball must be put near the blue ball.
But blue ball is right next to a door.
"""

def __init__(self, seed=None):
super().__init__(
num_rows=2,
num_cols=1,
room_size=9,
seed=seed
)

def gen_mission(self):
self.start_pos = np.array([3, 3])
self.start_dir = 0
door, pos = self.add_door(0, 0, None, 'red', False)
self.obj1 = Ball('yellow')
self.obj2 = Ball('blue')
self.place_obj(self.obj1, (4, 4), (1, 1))
self.place_obj(self.obj2, (pos[0], pos[1] + 1), (1, 1))
self.instrs = BeforeInstr(
OpenInstr(ObjDesc('door', door.color)),
PutNextInstr(ObjDesc(self.obj1.type, self.obj1.color),
ObjDesc(self.obj2.type, self.obj2.color)))


class Level_TestPutNextToCloseToDoor2(Level_TestPutNextToCloseToDoor1):
"""
The yellow ball must be put near the blue ball.
But blue ball is right next to a door.
"""

def gen_mission(self):
super().gen_mission()
self.instrs = PutNextInstr(ObjDesc(self.obj1.type, self.obj1.color),
ObjDesc(self.obj2.type, self.obj2.color))



class Level_TestPutNextToIdentical(RoomGridLevel):
"""
Test that the agent does not endlessly hesitate between
two identical objects.
"""

def __init__(self, seed=None):
super().__init__(
num_rows=1,
num_cols=1,
room_size=9,
seed=seed
)

def gen_mission(self):
self.start_pos = np.array([3, 3])
self.start_dir = 0
self.place_obj(Box('yellow'), (1, 1), (1, 1))
self.place_obj(Ball('blue'), (4, 4), (1, 1))
self.place_obj(Ball('red'), (2, 2), (1, 1))
instr1 = PutNextInstr(ObjDesc('ball', 'blue'),
ObjDesc('box', 'yellow'))
instr2 = PutNextInstr(ObjDesc('box', 'yellow'),
ObjDesc('ball', None))
self.instrs = BeforeInstr(instr1, instr2)


class Level_TestUnblockingLoop(RoomGridLevel):
"""Test that unblocking does not results into an infinite loop."""

def __init__(self, seed=None):
super().__init__(
num_rows=2,
num_cols=2,
room_size=9,
seed=seed
)

def gen_mission(self):
self.start_pos = np.array([15, 4])
self.start_dir = 2
door, pos = self.add_door(0, 0, 1, 'red', False)
door, pos = self.add_door(0, 1, 0, 'red', False)
door, pos = self.add_door(1, 1, 3, 'blue', False)
self.place_obj(Box('yellow'), (9, 1), (1, 1))
self.place_obj(Ball('blue'), (5, 3), (1, 1))
self.place_obj(Ball('yellow'), (6, 2), (1, 1))
self.place_obj(Key('blue'), (15, 15), (1, 1))
put = PutNextInstr(ObjDesc('key', 'blue'), ObjDesc('door', 'blue'))
goto1 = GoToInstr(ObjDesc('ball', 'yellow'))
goto2 = GoToInstr(ObjDesc('box', 'yellow'))
self.instrs = BeforeInstr(put, AndInstr(goto1, goto2))


class Level_TestPutNextCloseToDoor(RoomGridLevel):
"""Test that unblocking does not results into an infinite loop."""

def __init__(self, seed=None):
super().__init__(
num_rows=2,
num_cols=2,
room_size=9,
seed=seed
)

def gen_mission(self):
self.start_pos = np.array([15, 4])
self.start_dir = 2
door, pos = self.add_door(0, 0, 1, 'red', False)
door, pos = self.add_door(0, 1, 0, 'red', False)
door, pos = self.add_door(1, 1, 3, 'blue', False)
self.place_obj(Ball('blue'), (1, 7), (1, 1))
self.place_obj(Box('yellow'), (3, 15), (1, 1))
self.instrs = PutNextInstr(ObjDesc('box', 'yellow'), ObjDesc('ball', 'blue'))


register_levels(__name__, globals())
2 changes: 1 addition & 1 deletion babyai/levels/verifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def pos_next_to(pos_a, pos_b):
xa, ya = pos_a
xb, yb = pos_b
d = abs(xa - xb) + abs(ya - yb)
return d < 2
return d == 1


class ObjDesc:
Expand Down
46 changes: 20 additions & 26 deletions scripts/eval_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,28 +23,11 @@
from babyai.utils.agent import ModelAgent, RandomAgent
from random import Random

level_list = [
'OpenRedDoor',
'GoToLocal',
'PutNextLocal',

'GoToObjMaze',
'GoTo',
'Open',
'Pickup',
'PickupLoc',
'PutNext',

'Unlock',
'GoToImpUnlock',
'UnblockPickup',

'GoToSeq',
'Synth',
'SynthLoc',
'SynthSeq',
'BossLevel',
]

# MissBossLevel is the only level the bot currently can't always handle
level_list = [name for name, level in level_dict.items()
if not getattr(level, 'is_bonus', False) and name != 'MiniBossLevel']


parser = OptionParser()
parser.add_option(
Expand Down Expand Up @@ -108,11 +91,14 @@

start_time = time.time()

all_good = True

for level_name in level_list:

num_success = 0
total_reward = 0
total_steps = 0
total_steps = []
total_bfs_steps = 0

for run_no in range(options.num_runs):
level = level_dict[level_name]
Expand Down Expand Up @@ -157,9 +143,10 @@
episode_steps += 1

if done:
total_bfs_steps += expert.bfs_counter
if reward > 0:
num_success += 1
total_steps += episode_steps
total_steps.append(episode_steps)
if reward <= 0:
assert episode_steps == mission.max_steps # Is there another reason for this to happen ?
if options.verbose:
Expand All @@ -171,12 +158,19 @@
# Playing these 2 sets of actions should get you to the mission snapshot above
print(before_optimal_actions, optimal_actions)

all_good = all_good and (num_success == options.num_runs)

success_rate = 100 * num_success / options.num_runs
mean_reward = total_reward / options.num_runs
mean_steps = total_steps / options.num_runs
mean_steps = sum(total_steps) / options.num_runs

print('%16s: %.1f%%, r=%.3f, s=%.2f' % (level_name, success_rate, mean_reward, mean_steps))

# Uncomment the following line to print the number of steps per episode (useful to look for episodes to debug)
# print({options.seed + num_run: total_steps[num_run] for num_run in range(options.num_runs)})
end_time = time.time()
total_time = end_time - start_time
print('total time: %.1fs' % total_time)
if not all_good:
raise Exception("some tests failed")
print(total_bfs_steps)

6 changes: 6 additions & 0 deletions scripts/gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,12 @@ def showEnv(self, obs):
# UNCOMMENT THE FOLLOWING LINE TO DEBUG THE BOT
self.missionBox.append('\nOptimal Bot Advisor Stack: {}'.format(self.bot_advisor_agent.bot.stack))

self.missionBox.append("""\n"""
"""Arrow Keys: go left, right, up, down\n"""
"""PageUp, PageDown: pick and drop\n"""
"""Backspace: reset\n"""
"""Shift: let the bot act""")

# Set the steps remaining
stepsRem = unwrapped.steps_remaining
self.stepsLabel.setText(str(stepsRem))
Expand Down

0 comments on commit f63d2bb

Please sign in to comment.