Skip to content

Commit

Permalink
simplify the code and rename take_action -> replan
Browse files Browse the repository at this point in the history
  • Loading branch information
rizar committed Jan 25, 2019
1 parent b95cbe8 commit 67adc5b
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 59 deletions.
107 changes: 52 additions & 55 deletions babyai/bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def get_action(self):
subgoals.
Returns an action.
"""
pass
raise NotImplementedError()


def simulate_step(self, action):
Expand Down Expand Up @@ -143,31 +143,19 @@ def foolish_action_while_exploring(self, action):
# need to open it
self.bot.stack.append(OpenSubgoal(self.bot))

def take_action(self, action):
def replan(self, given_action):
"""
Function that updates the bot's stack given the action played.
Should be overridden in all sub-classes.
TODO: There are some steps that are common in both take_action and get_action
TODO: There are some steps that are common in both replan and get_action
for certain subgoals, maybe the bot's speed can be improved if we do them only once
- or at least we can factorize the code a bit
When this function returns `True`, it means no more replanning needs to be done.
When it returns `False`, the bot continues replanning.
"""
self.erroneous_box_opening(action)
self.simulate_step(action)

def erroneous_box_opening(self, action):
"""
When the agent opens a box, we raise an error and mark the task unsolvable.
This is a tad conservative, because maybe the box is irrelevant to the mission.
TODO: We can relax this by checking if the opened box is crucial for the mission.
TODO: We can relax this by checking if a similar box still exists if the box is crucial.
"""
if action == self.actions.toggle and self.fwd_cell is not None and self.fwd_cell.type == 'box':
raise DisappearedBoxError('A box was opened. Too Bad :(')

raise NotImplementedError()

class OpenSubgoal(Subgoal):
def get_action(self):
Expand Down Expand Up @@ -206,9 +194,7 @@ def get_action(self):

return self.actions.toggle

def take_action(self, action):
super().take_action(action)

def replan(self, action):
# CASE 1: The door is locked
# we need to fetch the key and return
# i.e. update the stack REGARDLESS of the action
Expand Down Expand Up @@ -282,11 +268,10 @@ class DropSubgoal(Subgoal):
def get_action(self):
return self.actions.drop

def take_action(self, action):
super().take_action(action)
if action == self.actions.drop:
def replan(self, given_action):
if given_action == self.actions.drop:
self.bot.stack.pop()
elif action in (self.actions.left, self.actions.right, self.actions.forward):
elif given_action in (self.actions.left, self.actions.right, self.actions.forward):
# Go back to where you were to drop what you got
self.bot.stack.append(GoNextToSubgoal(self.bot, tuple(self.fwd_pos)))
# done/pickup actions won't have any effect -> Next step would take us to the same subgoal
Expand All @@ -297,11 +282,10 @@ class PickupSubgoal(Subgoal):
def get_action(self):
return self.actions.pickup

def take_action(self, action):
super().take_action(action)
if action == self.actions.pickup:
def replan(self, given_action):
if given_action == self.actions.pickup:
self.bot.stack.pop()
elif action in (self.actions.left, self.actions.right):
elif given_action in (self.actions.left, self.actions.right):
# Go back to where you were to pickup what was in front of you
self.bot.stack.append(GoNextToSubgoal(self.bot, tuple(self.fwd_pos)))
# done/drop/forward actions won't have any effect -> Next step would take us to the same subgoal
Expand Down Expand Up @@ -471,9 +455,7 @@ def closest_wall_or_door_given_dir(position, direction):
return self.actions.left
return self.actions.right

def take_action(self, action):
super().take_action(action)

def replan(self, given_action):
if isinstance(self.datum, ObjDesc):
target_pos = self.bot.find_obj_pos(self.datum, self.adjacent)
if not target_pos:
Expand All @@ -486,9 +468,9 @@ def take_action(self, action):
# CASE 1: The position we are on is the one we should go next to
# -> Move away from it
if manhattan_distance(target_pos, self.pos) == (1 if self.adjacent else 0):
if action in (self.actions.drop, self.actions.pickup, self.actions.toggle):
if given_action in (self.actions.drop, self.actions.pickup, self.actions.toggle):
# Update the stack if we did something bad, or do nothing if the action doesn't change anything
self.foolish_action_while_exploring(action)
self.foolish_action_while_exploring(given_action)
# Whatever other action, the stack should stay the same, and it's the new action that should be evaluated
# TODO: Double check what happens in this scenario with all actions
return True
Expand All @@ -514,7 +496,7 @@ def take_action(self, action):
# CASE 3: the action taken would lead us to face the target cell
# -> Don't do anything. The stack will be popped at next step anyway
if np.array_equal(target_pos, self.new_fwd_pos):
assert action in (self.actions.forward, self.actions.left, self.actions.right), "Doesn't make sense"
assert given_action in (self.actions.forward, self.actions.left, self.actions.right), "Doesn't make sense"
return True

# CASE 5: otherwise
Expand Down Expand Up @@ -556,7 +538,7 @@ def take_action(self, action):
drop_pos = self.bot.find_drop_pos()
self.bot.stack.append(DropSubgoal(self.bot))
self.bot.stack.append(GoNextToSubgoal(self.bot, drop_pos))
if action == self.actions.pickup:
if given_action == self.actions.pickup:
return True
else:
self.bot.stack.append(PickupSubgoal(self.bot))
Expand All @@ -567,8 +549,8 @@ def take_action(self, action):
# TODO: what if I pickup another blocker (and that's good)

# CASE 5.4: If there is nothing blocking us and we drop/pickup/toggle something for no reason
if action in (self.actions.drop, self.actions.pickup, self.actions.toggle):
self.foolish_action_while_exploring(action)
if given_action in (self.actions.drop, self.actions.pickup, self.actions.toggle):
self.foolish_action_while_exploring(given_action)

# CASE 5.5: If we are GoingNextTo something because of exploration
if self.reason == 'Explore':
Expand Down Expand Up @@ -620,9 +602,7 @@ def unopened_door(pos, cell):

assert False, "0nothing left to explore"

def take_action(self, action):
super().take_action(action)

def replan(self, given_action):
# Find the closest unseen position
_, unseen_pos, with_blockers = self.bot.shortest_path(
lambda pos, cell: not self.bot.vis_mask[pos],
Expand Down Expand Up @@ -1052,33 +1032,50 @@ def get_action(self):
return self.bot.mission.actions.done
self.stack.pop()

def take_action(self, action):
def replan(self, given_action):
"""
Update agent's internal state. Should always be called after get_action() and before env.step()
Update the subgoal stack given the action that will be taken by the agent.
The algorithm is as follows: the top subgoal's `replan` method called,
until a moment when a subgoal that has already been at the top of the stack
pops up.
"""

self.step_count += 1

finished_updating = False
while not finished_updating:
empty_stack = self.empty_stack_update(action)
if not empty_stack:
subgoal = self.stack[-1]
subgoal.update_agent_attributes()
finished_updating = subgoal.take_action(action)
else:
finished_updating = True
seen_stack_tops = set()
while True:
if not self.stack:
self.empty_stack_update()
break

subgoal = self.stack[-1]
seen_stack_tops.add(subgoal)

subgoal.update_agent_attributes()
subgoal.simulate_step(given_action)
if subgoal.replan(given_action):
break

def empty_stack_update(self, action):
pos = self.mission.agent_pos
dir_vec = self.mission.dir_vec
fwd_pos = pos + dir_vec
self.stack.append(GoNextToSubgoal(self, fwd_pos))

if len(self.stack) == 0:
if action != self.mission.actions.done:
self.stack.append(GoNextToSubgoal(self, fwd_pos))
return True
def check_erroneous_box_opening(self, action):
"""
When the agent opens a box, we raise an error and mark the task unsolvable.
This is a tad conservative, because maybe the box is irrelevant to the mission.
TODO: We can relax this by checking if the opened box is crucial for the mission.
TODO: We can relax this by checking if a similar box still exists if the box is crucial.
"""
if (action == self.actions.toggle
and self.fwd_cell is not None
and self.fwd_cell.type == 'box'):
raise DisappearedBoxError('A box was opened. Too Bad :(')

def step(self):
action = self.get_action()
self.take_action(action)
self.replan(action)
2 changes: 1 addition & 1 deletion babyai/utils/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def on_reset(self):
def act(self, obs=None, update_internal_state=True, *args, **kwargs):
action = self.bot.get_action()
if update_internal_state:
self.bot.take_action(action)
self.bot.replan(action)
return {'action': action}

def analyze_feedback(self, reward, done):
Expand Down
2 changes: 1 addition & 1 deletion scripts/eval_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@
else:
optimal_actions.append(action)

expert.take_action(action)
expert.replan(action)
obs, reward, done, info = mission.step(action)

total_reward += reward
Expand Down
2 changes: 1 addition & 1 deletion scripts/gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ def stepEnv(self, action=None):
if action is None:
action = self.bot_advisor_action

self.bot_advisor_agent.bot.take_action(action)
self.bot_advisor_agent.bot.replan(action)
obs, reward, done, info = self.env.step(action)

self.showEnv(obs)
Expand Down
2 changes: 1 addition & 1 deletion scripts/train_intelligent_expert.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def generate_dagger_demos(env_name, seeds, fail_obss, fail_actions, mean_steps):
assert check_obss_equality(obs, new_obs), "Observations {} of seed {} don't match".format(j, seeds[i])
mission = obs['mission']
action = agent.act(update_internal_state=False)['action']
_ = agent.bot.take_action(fail_actions[i][j])
_ = agent.bot.replan(fail_actions[i][j])
debug_info['actions'].append(fail_actions[i][j])
new_obs, reward, done, _ = env.step(fail_actions[i][j])
if done and reward > 0:
Expand Down

0 comments on commit 67adc5b

Please sign in to comment.