Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/maximecb/baby-ai-game
Browse files Browse the repository at this point in the history
  • Loading branch information
lcswillems committed Jun 28, 2018
2 parents 7bbd16b + f2e8b3a commit b3a2586
Show file tree
Hide file tree
Showing 4 changed files with 211 additions and 17 deletions.
190 changes: 190 additions & 0 deletions babyai/levels/levels.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,68 @@ def gym_id(self):
return self.__class__.gym_id


class RoomGridLevelHC(RoomGrid):
"""
Base for levels based on RoomGrid
A level, given a random seed, generates missions generated from
one or more patterns. Levels should produce a family of missions
of approximately similar difficulty.
"""

def __init__(
self,
room_size=6,
max_steps=None,
**kwargs
):
# Default max steps computation
if max_steps is None:
max_steps = 4 * (room_size ** 2)

super().__init__(
room_size=room_size,
max_steps=max_steps,
**kwargs
)

def reset(self, **kwargs):
obs = super().reset(**kwargs)
return obs

def step(self, action):
obs, reward, done, info = super().step(action)

# If we've successfully completed the mission
if self.verifier(self, action):
done = True
reward = self._reward()

return obs, reward, done, info

def _gen_grid(self, width, height):
super()._gen_grid(width, height)

# Generate the mission
self.gen_mission()

self.mission = self.surface

def gen_mission(self):
"""
Generate a mission (instructions and matching environment)
Derived level classes should implement this method
"""
raise NotImplementedError

@property
def level_name(self):
return self.__class__.level_name

@property
def gym_id(self):
return self.__class__.gym_id


class Level_OpenRedDoor(RoomGridLevel):
"""
Go to the red door
Expand Down Expand Up @@ -882,6 +944,134 @@ def __init__(self, seed=None):
)


def pos_next_to(a, b):
x0, y0 = a
x1, y1 = b
return abs(x0 - x1) < 2 and abs(y0 - y1) < 2


def verify_put_next(obj_x, obj_y):
def verifier(env, action):
return pos_next_to(obj_x.cur_pos, obj_y.init_pos)
return verifier


def verify_both(verify_a, verify_b):
def verifier(env, action):
return verify_a(env, action) and verify_b(env, action)
return verifier


class Level_PutNext(RoomGridLevelHC):
"""
Put an object next to another object
There are many objects inside a room, so that the number of possible
instructions is potentially large.
"""

def __init__(
self,
room_size=8,
num_objs=8,
seed=None
):
assert num_objs >= 5, "no guarantee that non-adjacent objects exist with N < 5"
self.num_objs = num_objs

super().__init__(
num_rows=1,
num_cols=1,
room_size=room_size,
seed=seed
)

def gen_mission(self):
self.place_agent(0, 0)

self.add_distractors(self.num_objs)
objs = self.get_room(0, 0).objs

# Select two objects that are not already adjacent
while True:
x, y = self._rand_subset(objs, 2)
if not pos_next_to(x.init_pos, y.init_pos):
break

self.surface = "put the %s %s next to the %s %s" % (
x.color, x.type,
y.color, y.type
)

self.verifier = verify_put_next(x, y)


class Level_PutNextS6N5(Level_PutNext):
def __init__(self, seed=None):
super().__init__(
room_size=6,
num_objs=5,
seed=seed
)


class Level_PutNextS7N5(Level_PutNext):
def __init__(self, seed=None):
super().__init__(
room_size=7,
num_objs=5,
seed=seed
)


class Level_PutNextS8N6(Level_PutNext):
def __init__(self, seed=None):
super().__init__(
room_size=8,
num_objs=6,
seed=seed
)


class Level_PutTwoNext(RoomGridLevelHC):
"""
Put two objects next to a third object
There are many objects inside a room, so that the number of possible
instructions is potentially large.
"""

def __init__(
self,
room_size=8,
num_objs=8,
seed=None
):
self.num_objs = num_objs
super().__init__(
num_rows=1,
num_cols=1,
room_size=room_size,
seed=seed
)

def gen_mission(self):
self.place_agent(0, 0)
self.add_distractors(self.num_objs)
objs = self.get_room(0, 0).objs

x, y, z = self._rand_subset(objs, 3)

self.surface = "put the %s %s and the %s %s next to the %s %s" % (
x.color, x.type,
y.color, y.type,
z.color, z.type
)

self.verifier = verify_both(
verify_put_next(x, z),
verify_put_next(y, z)
)


# Dictionary of levels, indexed by name, lexically sorted
level_dict = OrderedDict()

Expand Down
10 changes: 3 additions & 7 deletions babyai/levels/verifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def dot_product(v1, v2):
class Verifier(ABC):
def __init__(self, env):
self.env = env
self.startDirVec = env.get_dir_vec()
self.startDirVec = env.dir_vec

@abstractmethod
def step(self):
Expand Down Expand Up @@ -46,7 +46,7 @@ def _obj_desc_to_poss(self, obj_desc):
type = cell.type
state = None

# Check if object's type matches description
# Check if object's type matches description
if obj_desc.type != None and type != obj_desc.type:
continue

Expand Down Expand Up @@ -88,11 +88,7 @@ def _get_in_front_of_pos(self):
The agent's state is the 2-tuple (agent_dir, agent_pos).
"""

pos = self.env.agent_pos
d = self.env.get_dir_vec()
pos = (pos[0] + d[0], pos[1] + d[1])

return pos
return self.env.front_pos


class InstrSeqVerifier(Verifier):
Expand Down
27 changes: 18 additions & 9 deletions scripts/make_agent_demos.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,34 @@
help="action with highest probability is selected")
parser.add_argument("--save-interval", type=int, default=0,
help="interval between demonstrations saving (default: 0, 0 means only at the end)")
parser.add_argument("--filter-steps", type=int, default=0,
help="filter out demos with number of steps more than filter-steps (default: 0, No filtering)")
parser.add_argument("--valid", action="store_true", default=False,
help="generating demonstrations for validation set")

args = parser.parse_args()

# Set seed for all randomness sources

utils.seed(args.seed)

# Generate environment

env = gym.make(args.env)
env.seed(args.seed)

# Select Origin
origin = "agent" if not args.valid else "agent_valid"
assert not(args.valid) or args.seed == 0

# Define agent

agent = utils.load_agent(args, env)

# Load demonstrations

demos = utils.load_demos(args.env, "agent")
demos = utils.load_demos(args.env, origin)
utils.synthesize_demos(demos)

for i in range(1, args.episodes+1):
while True:
# Run the expert for one episode

done = False
Expand All @@ -54,16 +61,18 @@

demo.append((obs, action, reward, done))
obs = new_obs

demos.append(demo)
if args.filter_steps is not 0:
if len(demo) <= args.filter_steps and reward != 0:
demos.append(demo)
if len(demos) == args.episodes:
break

# Save demonstrations

if args.save_interval > 0 and i < args.episodes and i % args.save_interval == 0:
utils.save_demos(demos, args.env, "agent")
utils.save_demos(demos, args.env, origin)
utils.synthesize_demos(demos)

# Save demonstrations

utils.save_demos(demos, args.env, "agent")
utils.save_demos(demos, args.env, origin)
utils.synthesize_demos(demos)
1 change: 0 additions & 1 deletion test_mission_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ def reset():
mission = level(seed=seed)

print('seed=%d' % seed)
print(mission.instrs)
print(mission.surface)

pixmap = mission.render('pixmap')
Expand Down

0 comments on commit b3a2586

Please sign in to comment.