Skip to content

Commit

Permalink
test bot advisor on eval mode - fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
saleml committed Dec 14, 2018
1 parent 4969fae commit a6761ce
Showing 1 changed file with 77 additions and 2 deletions.
79 changes: 77 additions & 2 deletions scripts/eval_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,25 @@
"""
Evaluate the success rate of the bot
This script is used for testing/debugging purposes
Examples of usage:
- Run the bot on the GoTo level 10 times (seeds 9 to 18)
eval_bot.py --level GoTo --num_runs 10 --seed 9
- for all levels, 100 times, run a Random(seed 0) agent for len(episode)/3 steps before running the bot:
eval_bot.py --advise_mode --num_runs 100
- for all levels, 500 times, during the first 10 steps, choose action form a Random(seed 9) agent with proba .9 or
optimal (from bot) with proba .1, then continue with optimal bot actions:
eval_boy.py --advise_mode --bad_action_proba .8 --non_optimal_steps 10 --random_agent_seed 9
"""

import random
import time
from optparse import OptionParser
from babyai.levels import level_dict
from babyai.bot import Bot
from babyai.bot import Bot, DisappearedBoxError
from babyai.utils.agent import ModelAgent, RandomAgent
import numpy as np

level_list = [
'OpenRedDoor',
Expand Down Expand Up @@ -39,6 +51,34 @@
"--level",
default=None
)
parser.add_option(
"--advise_mode",
action='store_true',
default=False,
help='If specified, a RandomAgent or ModelAgent will act first, then the bot will take over')
parser.add_option(
"--non_optimal_steps",
type=int,
default=None,
help='Number of non bot steps ModelAgent or RandomAgent takes before letting the bot take over'
)
parser.add_option(
"--model",
default=None,
help='Model to use to act for a few steps before letting the bot take over'
)
parser.add_option(
"--random_agent_seed",
type="int",
default=1,
help='Seed of the random agent that acts a few steps before letting the bot take over'
)
parser.add_option(
"--bad_action_proba",
type="float",
default=1.,
help='Probability of performing the non-optimal action when the random/model agent is performing'
)
parser.add_option(
"--seed",
type="int",
Expand All @@ -58,6 +98,15 @@
if options.level:
level_list = [options.level]

bad_agent = None
if options.advise_mode:
if options.model:
bad_agent = ModelAgent(options.model, obss_preprocessor=None,
argmax=True)
else:
bad_agent = RandomAgent(seed=options.random_agent_seed,
env=level_dict[level_list[0]]())

start_time = time.time()

for level_name in level_list:
Expand All @@ -76,10 +125,32 @@
if options.verbose:
print('%s/%s: %s, seed=%d' % (run_no+1, options.num_runs, mission.surface, mission_seed))

optimal_actions = []
before_optimal_actions = []
non_optimal_steps = options.non_optimal_steps or int(mission.max_steps // 3)
np.random.seed(mission_seed)

try:
episode_steps = 0
while True:
action = expert.get_action()
if options.advise_mode and episode_steps < non_optimal_steps:
if np.random.rand() < options.bad_action_proba:
while True:
action = bad_agent.act(mission.gen_obs())['action'].item()

# To make things simple, only allow random left/right/fwd moves, and opening of doors
if action in (mission.actions.left, mission.actions.right, mission.actions.forward):
break
fwd_pos = mission.agent_pos + mission.dir_vec
fwd_cell = mission.grid.get(*fwd_pos)
if action == mission.actions.toggle and fwd_cell is not None and fwd_cell.type == 'door':
break

before_optimal_actions.append(action)
else:
optimal_actions.append(action)

expert.take_action(action)
obs, reward, done, info = mission.step(action)

Expand All @@ -91,11 +162,15 @@
num_success += 1
total_steps += episode_steps
if reward <= 0:
print('FAILURE on %s, seed %d, reward %.2f' % (level_name, mission_seed, reward))
assert episode_steps == mission.max_steps # Is there another reason for this to happen ?
if options.verbose:
print('FAILURE on %s, seed %d, reward %.2f' % (level_name, mission_seed, reward))
break
except Exception as e:
print('FAILURE on %s, seed %d' % (level_name, mission_seed))
print(e)
# Playing these 2 sets of actions should get you to the mission snapshot above
print(before_optimal_actions, optimal_actions)

success_rate = 100 * num_success / options.num_runs
mean_reward = total_reward / options.num_runs
Expand Down

0 comments on commit a6761ce

Please sign in to comment.