Merge pull request mila-iqia#19 from mila-udem/dima-byebye-gotoadjpos

fix several bugs in the bot
mattdeitke · Jan 22, 2019 · f63d2bb · f63d2bb
2 parents 07ddcd0 + d0ff7ce
commit f63d2bb
Show file tree

Hide file tree

Showing 8 changed files with 371 additions and 360 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -20,9 +20,10 @@ script:
     # Quickly exercise the RL training code
     - time python3 -m scripts.train_rl --env BabyAI-GoToObj-v0 --algo ppo --procs 4 --batch-size 80 --log-interval 1 --save-interval 2 --val-episodes 10 --frames 300 --arch cnn1 --instr-dim 16 --image-dim 16 --memory-dim 16
 
-    # Check that the bot works on Boss Level
-    - python3 -m scripts.eval_bot --level BossLevel --num_runs 50 | grep "100.0%"
-    - python3 -m scripts.eval_bot --level BossLevel --num_runs 50 --advise_mode --non_optimal_steps 100 --bad_action_proba .3 | grep "100.0%"
+    # Check that the bot works on a few episodes of Boss Level and one episode of each level
+    - python3 -m scripts.eval_bot --level BossLevel --num_runs 50
+    - python3 -m scripts.eval_bot --level BossLevel --num_runs 50 --advise_mode --non_optimal_steps 100 --bad_action_proba .3
+    - python3 -m scripts.eval_bot --num_runs 1
 
     # Quickly test the generation of bot demos
     - python3 -m scripts.make_agent_demos --env BabyAI-GoToRedBallGrey-v0 --episodes 100 --valid-episodes 32

diff --git a/babyai/bot.py b/babyai/bot.py
diff --git a/babyai/levels/bonus_levels.py b/babyai/levels/bonus_levels.py
@@ -1009,6 +1009,9 @@ def __init__(self, seed=None):
             seed=seed
         )
 
+for name, level in list(globals().items()):
+    if name.startswith('Level_'):
+        level.is_bonus = True
 
 # Register the levels in this file
 register_levels(__name__, globals())
diff --git a/babyai/levels/levelgen.py b/babyai/levels/levelgen.py
@@ -120,6 +120,10 @@ def validate_instrs(self, instr):
             instr.reset_verifier(self)
 
             # Check that the objects are not already next to each other
+            if set(instr.desc_move.obj_set).intersection(
+                    set(instr.desc_fixed.obj_set)):
+                raise RejectSampling(
+                    "there are objects that match both lhs and rhs of PutNext")
             if instr.objs_next():
                 raise RejectSampling('objs already next to each other')
 

diff --git a/babyai/levels/test_levels.py b/babyai/levels/test_levels.py
@@ -15,7 +15,7 @@ class Level_TestGoToBlocked(RoomGridLevel):
     Go to a yellow ball that is blocked with a lot of red balls.
     """
 
-    def __init__(self, room_size=8, seed=None):
+    def __init__(self, seed=None):
         super().__init__(
             num_rows=1,
             num_cols=1,
@@ -32,7 +32,7 @@ def gen_mission(self):
         for i in (1, 2, 3):
             for j in (1, 2, 3):
                 if (i, j) not in [(1 ,1), (3, 3)]:
-                    self.grid.set(i, j, Ball('red'))
+                    self.place_obj(Ball('red'), (i, j), (1, 1))
         self.instrs = GoToInstr(ObjDesc(obj.type, obj.color))
 
 
@@ -42,7 +42,7 @@ class Level_TestPutNextToBlocked(RoomGridLevel):
     Pick up a yellow ball and put it next to a blocked blue ball.
     """
 
-    def __init__(self, room_size=8, seed=None):
+    def __init__(self, seed=None):
         super().__init__(
             num_rows=1,
             num_cols=1,
@@ -64,4 +64,121 @@ def gen_mission(self):
                                    ObjDesc(obj2.type, obj2.color))
 
 
+class Level_TestPutNextToCloseToDoor1(RoomGridLevel):
+    """
+    The yellow ball must be put near the blue ball.
+    But blue ball is right next to a door.
+    """
+
+    def __init__(self, seed=None):
+        super().__init__(
+            num_rows=2,
+            num_cols=1,
+            room_size=9,
+            seed=seed
+        )
+
+    def gen_mission(self):
+        self.start_pos = np.array([3, 3])
+        self.start_dir = 0
+        door, pos = self.add_door(0, 0, None, 'red', False)
+        self.obj1 = Ball('yellow')
+        self.obj2 = Ball('blue')
+        self.place_obj(self.obj1, (4, 4), (1, 1))
+        self.place_obj(self.obj2, (pos[0], pos[1] + 1), (1, 1))
+        self.instrs = BeforeInstr(
+            OpenInstr(ObjDesc('door', door.color)),
+            PutNextInstr(ObjDesc(self.obj1.type, self.obj1.color),
+                         ObjDesc(self.obj2.type, self.obj2.color)))
+
+
+class Level_TestPutNextToCloseToDoor2(Level_TestPutNextToCloseToDoor1):
+    """
+    The yellow ball must be put near the blue ball.
+    But blue ball is right next to a door.
+    """
+
+    def gen_mission(self):
+        super().gen_mission()
+        self.instrs = PutNextInstr(ObjDesc(self.obj1.type, self.obj1.color),
+                                   ObjDesc(self.obj2.type, self.obj2.color))
+
+
+
+class Level_TestPutNextToIdentical(RoomGridLevel):
+    """
+    Test that the agent does not endlessly hesitate between
+    two identical objects.
+    """
+
+    def __init__(self, seed=None):
+        super().__init__(
+            num_rows=1,
+            num_cols=1,
+            room_size=9,
+            seed=seed
+        )
+
+    def gen_mission(self):
+        self.start_pos = np.array([3, 3])
+        self.start_dir = 0
+        self.place_obj(Box('yellow'), (1, 1), (1, 1))
+        self.place_obj(Ball('blue'), (4, 4), (1, 1))
+        self.place_obj(Ball('red'), (2, 2), (1, 1))
+        instr1 = PutNextInstr(ObjDesc('ball', 'blue'),
+                              ObjDesc('box', 'yellow'))
+        instr2 = PutNextInstr(ObjDesc('box', 'yellow'),
+                              ObjDesc('ball', None))
+        self.instrs = BeforeInstr(instr1, instr2)
+
+
+class Level_TestUnblockingLoop(RoomGridLevel):
+    """Test that unblocking does not results into an infinite loop."""
+
+    def __init__(self, seed=None):
+        super().__init__(
+            num_rows=2,
+            num_cols=2,
+            room_size=9,
+            seed=seed
+        )
+
+    def gen_mission(self):
+        self.start_pos = np.array([15, 4])
+        self.start_dir = 2
+        door, pos = self.add_door(0, 0, 1, 'red', False)
+        door, pos = self.add_door(0, 1, 0, 'red', False)
+        door, pos = self.add_door(1, 1, 3, 'blue', False)
+        self.place_obj(Box('yellow'), (9, 1), (1, 1))
+        self.place_obj(Ball('blue'), (5, 3), (1, 1))
+        self.place_obj(Ball('yellow'), (6, 2), (1, 1))
+        self.place_obj(Key('blue'), (15, 15), (1, 1))
+        put = PutNextInstr(ObjDesc('key', 'blue'), ObjDesc('door', 'blue'))
+        goto1 = GoToInstr(ObjDesc('ball', 'yellow'))
+        goto2 = GoToInstr(ObjDesc('box', 'yellow'))
+        self.instrs = BeforeInstr(put, AndInstr(goto1, goto2))
+
+
+class Level_TestPutNextCloseToDoor(RoomGridLevel):
+    """Test that unblocking does not results into an infinite loop."""
+
+    def __init__(self, seed=None):
+        super().__init__(
+            num_rows=2,
+            num_cols=2,
+            room_size=9,
+            seed=seed
+        )
+
+    def gen_mission(self):
+        self.start_pos = np.array([15, 4])
+        self.start_dir = 2
+        door, pos = self.add_door(0, 0, 1, 'red', False)
+        door, pos = self.add_door(0, 1, 0, 'red', False)
+        door, pos = self.add_door(1, 1, 3, 'blue', False)
+        self.place_obj(Ball('blue'), (1, 7), (1, 1))
+        self.place_obj(Box('yellow'), (3, 15), (1, 1))
+        self.instrs = PutNextInstr(ObjDesc('box', 'yellow'), ObjDesc('ball', 'blue'))
+
+
 register_levels(__name__, globals())
diff --git a/babyai/levels/verifier.py b/babyai/levels/verifier.py
@@ -35,7 +35,7 @@ def pos_next_to(pos_a, pos_b):
     xa, ya = pos_a
     xb, yb = pos_b
     d = abs(xa - xb) + abs(ya - yb)
-    return d < 2
+    return d == 1
 
 
 class ObjDesc:

diff --git a/scripts/eval_bot.py b/scripts/eval_bot.py
@@ -23,28 +23,11 @@
 from babyai.utils.agent import ModelAgent, RandomAgent
 from random import Random
 
-level_list = [
-    'OpenRedDoor',
-    'GoToLocal',
-    'PutNextLocal',
-
-    'GoToObjMaze',
-    'GoTo',
-    'Open',
-    'Pickup',
-    'PickupLoc',
-    'PutNext',
-
-    'Unlock',
-    'GoToImpUnlock',
-    'UnblockPickup',
-
-    'GoToSeq',
-    'Synth',
-    'SynthLoc',
-    'SynthSeq',
-    'BossLevel',
-]
+
+# MissBossLevel is the only level the bot currently can't always handle
+level_list = [name for name, level in level_dict.items()
+              if not getattr(level, 'is_bonus', False) and name != 'MiniBossLevel']
+
 
 parser = OptionParser()
 parser.add_option(
@@ -108,11 +91,14 @@
 
 start_time = time.time()
 
+all_good = True
+
 for level_name in level_list:
 
     num_success = 0
     total_reward = 0
-    total_steps = 0
+    total_steps = []
+    total_bfs_steps = 0
 
     for run_no in range(options.num_runs):
         level = level_dict[level_name]
@@ -157,9 +143,10 @@
                 episode_steps += 1
 
                 if done:
+                    total_bfs_steps += expert.bfs_counter
                     if reward > 0:
                         num_success += 1
-                        total_steps += episode_steps
+                        total_steps.append(episode_steps)
                     if reward <= 0:
                         assert episode_steps == mission.max_steps  # Is there another reason for this to happen ?
                         if options.verbose:
@@ -171,12 +158,19 @@
             # Playing these 2 sets of actions should get you to the mission snapshot above
             print(before_optimal_actions, optimal_actions)
 
+    all_good = all_good and (num_success == options.num_runs)
+
     success_rate = 100 * num_success / options.num_runs
     mean_reward = total_reward / options.num_runs
-    mean_steps = total_steps / options.num_runs
+    mean_steps = sum(total_steps) / options.num_runs
 
     print('%16s: %.1f%%, r=%.3f, s=%.2f' % (level_name, success_rate, mean_reward, mean_steps))
-
+    # Uncomment the following line to print the number of steps per episode (useful to look for episodes to debug)
+    # print({options.seed + num_run: total_steps[num_run] for num_run in range(options.num_runs)})
 end_time = time.time()
 total_time = end_time - start_time
 print('total time: %.1fs' % total_time)
+if not all_good:
+    raise Exception("some tests failed")
+print(total_bfs_steps)
+
diff --git a/scripts/gui.py b/scripts/gui.py
@@ -366,6 +366,12 @@ def showEnv(self, obs):
         # UNCOMMENT THE FOLLOWING LINE TO DEBUG THE BOT
         self.missionBox.append('\nOptimal Bot Advisor Stack: {}'.format(self.bot_advisor_agent.bot.stack))
 
+        self.missionBox.append("""\n"""
+            """Arrow Keys: go left, right, up, down\n"""
+            """PageUp, PageDown: pick and drop\n"""
+            """Backspace: reset\n"""
+            """Shift: let the bot act""")
+
         # Set the steps remaining
         stepsRem = unwrapped.steps_remaining
         self.stepsLabel.setText(str(stepsRem))