Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modified MuJoCo env to be compatible with MuJoCo200 #11

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,6 @@ ENV/
.mypy_cache/

data/
/.pytest_cache/

.idea
39 changes: 36 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ This python package is an extension to OpenAI Gym for auxiliary tasks (multitask

## Dependencies

- [Python 3.5.2](https://www.python.org/)
- [Python 3.5.2](https://www.python.org/) and [Python 3.6.x](https://www.python.org/)
- [OpenAI Gym](https://gym.openai.com/)
- [MuJoCo](http://mujoco.org/) (Optional)
- [mujoco-py](https://github.com/openai/mujoco-py#install-mujoco) (Optional)
Expand Down Expand Up @@ -36,9 +36,20 @@ at `~/.mujoco/mjkey.txt`
pip3 install -e .[mujoco]
```

#### MuJoCo Env naming Rule
Please check the official [[mujoco_py]](https://github.com/openai/mujoco-py)
- `env_name-v0`: MuJoCo version < 2.0
- `env_name-v1`: MuJoCo version == 2.0

### Test Installation
You have two ways to test it.
```
nosetests -v gym_extensions
# 1.
$ nosetests -v gym_extensions

# 2. MuJoCo test
$ cd tests
$ python simple_test.py
```

### Possible Issues
Expand Down Expand Up @@ -75,10 +86,32 @@ env = gym.make("State-Based-Navigation-2d-Map1-Goal1-v0")
```

```python
import gym
from gym_extensions.continuous import mujoco
env = gym.make("HopperWall-v0")

# env = gym.make("HopperGravityHalf-v0") # MuJoCo before ver2.0
env = gym.make("HopperGravityHalf-v1") # MuJoCo ver2.0

env.reset()
for _ in range(1000):
env.render()
env.step(env.action_space.sample()) # take a random action
env.close()
```

```python
import gym
import gym_extensions.continuous.mujoco.nervenet_envs.register as register

env = gym.make("WalkersHopperthree-v1")
env.reset()

for _ in range(100*3):
env.render()
action = env.action_space.sample()
state, reward, done, info = env.step(action)

```

## More info

Expand Down
725 changes: 368 additions & 357 deletions gym_extensions/continuous/mujoco/__init__.py

Large diffs are not rendered by default.

194 changes: 194 additions & 0 deletions gym_extensions/continuous/mujoco/food_envs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
import tempfile
import xml.etree.ElementTree as ET
import numpy as np
import random
from gym import utils
import math, pyrr
from gym_extensions.continuous.mujoco.wall_envs import isclose, rotate_vector

FOOD_BONUS = 100


def SingleFoodEnvFactory(class_type):
"""class_type should be an OpenAI gym time"""

class SingleFoodEnv(class_type, utils.EzPickle):
"""
This provides a piece of frictionless food, which the agent must try to seek in the env.
we might be using visual observation instead of sensors installed.
"""

def __init__(
self,
model_path,
food_height=.25,
food_pos_range=([1.5, 1.5], [2.5, 2.5]),
# food_pos_range=([0.5, 0.5], [0.8, 0.8]), # test purpose
n_bins=10,
sensor_range=10.,
sensor_span=math.pi / 2,
*args,
**kwargs):

# TODO: temp workaround: seems openai gym requires this tho, I don't better way to specify this.
self._seed = 0

self._n_bins = n_bins
# Add a sensor
self._sensor_range = sensor_range
self._sensor_span = sensor_span

tree = ET.parse(model_path)
worldbody = tree.find(".//worldbody")

height = food_height
self.food_pos_range = food_pos_range
rand_x = random.uniform(food_pos_range[0][0], food_pos_range[1][0])
rand_y = random.uniform(food_pos_range[0][1], food_pos_range[1][1])
self.food_pos = food_pos = (rand_x, rand_y)
torso_x, torso_y = 0, 0
self._init_torso_x = torso_x
self.class_type = class_type
self._init_torso_y = torso_y
self.food_size = (0.2, 0.4, height)

ET.SubElement(
worldbody, "geom",
name="food",
pos="%f %f %f" % (food_pos[0],
food_pos[1],
height / 2.),
size="%f %f %f" % self.food_size,
type="sphere",
material="",
contype="1",
conaffinity="1",
density="0.00001",
rgba="1.0 0. 1. 1"
)

torso = tree.find(".//body[@name='torso']")
geoms = torso.findall(".//geom")
for geom in geoms:
if 'name' not in geom.attrib:
raise Exception("Every geom of the torso must have a name "
"defined")

# MuJoCo200 only accepts the file extension of "xml"
_, file_path = tempfile.mkstemp(suffix=".xml", text=True)
tree.write(file_path)

# self._goal_range = self._find_goal_range()
self._cached_segments = None

class_type.__init__(self, model_path=file_path)
utils.EzPickle.__init__(self)

# import pdb; pdb.set_trace()

def get_body_xquat(self, body_name):
idx = self.model.body_names.index(body_name)
return self.sim.data.body_xquat[idx]

def reset(self):
temp = np.copy(self.model.geom_pos)

rand_x = random.uniform(self.food_pos_range[0][0], self.food_pos_range[1][0])
rand_y = random.uniform(self.food_pos_range[0][1], self.food_pos_range[1][1])

# TODO: make this more robust,
# hardcoding that the second geom is the food,
# but we should do something more robust??
assert isclose(temp[1][0], self.food_pos[0])
assert isclose(temp[1][1], self.food_pos[1])

self.food_pos = (rand_x, rand_y)
self.model.geom_pos[1][0] = self.food_pos[0]
self.model.geom_pos[1][1] = self.food_pos[1]
ob = super(SingleFoodEnv, self).reset()
return ob

def _get_obs(self):
"""
The observation would include both information about
the robot itself as well as the sensors around its environment
"""
robot_x, robot_y, robot_z = robot_coords = self.get_body_com("torso")
food_readings = np.zeros(self._n_bins)

for ray_idx in range(self._n_bins):
# self._sensor_span * 0.5 + 1.0 * (2 * ray_idx + 1) / (2 * self._n_bins) * self._sensor_span
theta = (self._sensor_span / self._n_bins) * ray_idx - self._sensor_span / 2.
forward_normal = rotate_vector(np.array([1, 0, 0]), [0, 1, 0], theta)
# Note: Mujoco quaternions use [w, x, y, z] convention
quat_mujoco = self.get_body_xquat("torso")
quat = [quat_mujoco[1], quat_mujoco[2], quat_mujoco[3], quat_mujoco[0]]
ray_direction = pyrr.quaternion.apply_to_vector(quat, forward_normal)
ray = pyrr.ray.create(robot_coords, ray_direction)

bottom_point = [self.food_pos[0] - self.food_size[0] / 2.,
self.food_pos[1] - self.food_size[1] / 2.,
0.]
top_point = [self.food_pos[0] + self.food_size[0] / 2.,
self.food_pos[1] + self.food_size[1] / 2.,
self.food_size[2]]

# import pdb; pdb.set_trace()
bounding_box = pyrr.aabb.create_from_points([bottom_point, top_point])
intersection = pyrr.geometric_tests.ray_intersect_aabb(ray, bounding_box)

if intersection is not None:
distance = np.linalg.norm(intersection - robot_coords)
if distance <= self._sensor_range:
food_readings[ray_idx] = distance / self._sensor_range

obs = np.concatenate([
self.class_type._get_obs(self),
food_readings
])

return obs

def _is_in_collision(self, pos):
x, y = pos

minx = self.food_pos[0] * 1 - 1 * 0.5 - self._init_torso_x
maxx = self.food_pos[0] * 1 + 1 * 0.5 - self._init_torso_x
miny = self.food_pos[1] * 1 - 1 * 0.5 - self._init_torso_y
maxy = self.food_pos[1] * 1 + 1 * 0.5 - self._init_torso_y
if minx <= x <= maxx and miny <= y <= maxy:
return True
return False

def _get_food_bonus(self):
robot_x, robot_y, robot_z = self.get_body_com("torso")
if self._is_in_collision((robot_x, robot_y)):
return FOOD_BONUS
else:
return 0

def _is_done(self):
robot_x, robot_y, robot_z = self.get_body_com("torso")
if self._is_in_collision((robot_x, robot_y)):
return True
else:
return False

def get_xy(self):
return self.get_body_com("torso")[:2]

def step(self, action):
state, reward, done, info = super(SingleFoodEnv, self).step(action)

next_obs = self._get_obs()
reward += self._get_food_bonus() # TODO: think about the reward func design
done = self._is_done()

x, y = self.get_body_com("torso")[:2]
return next_obs, reward, done, info

def action_from_key(self, key):
return self.action_from_key(key)

return SingleFoodEnv

24 changes: 11 additions & 13 deletions gym_extensions/continuous/mujoco/gravity_envs.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
import os.path as osp
import tempfile
import xml.etree.ElementTree as ET
import math

import numpy as np
import gym
import random
import os
from gym import utils
from gym.envs.mujoco import mujoco_env
import mujoco_py

""" As of 18/07/2019
Check this out!
This can be good reference to modify some attributes in `mujoco_py`
https://gist.github.com/machinaut/9bd1473c763554086c176d39062700b0
"""


def GravityEnvFactory(class_type):
"""class_type should be an OpenAI gym type"""
Expand All @@ -18,6 +15,7 @@ class GravityEnv(class_type, utils.EzPickle):
"""
Allows the gravity to be changed by the
"""

def __init__(
self,
model_path,
Expand All @@ -30,8 +28,8 @@ def __init__(
# make sure we're using a proper OpenAI gym Mujoco Env
assert isinstance(self, mujoco_env.MujocoEnv)

self.model.opt.gravity = (mujoco_py.mjtypes.c_double * 3)(*[0., 0., gravity])
self.model._compute_subtree()
self.model.forward()
self.model.opt.gravity[0] = 0.
self.model.opt.gravity[1] = 0.
self.model.opt.gravity[2] = gravity * 3

return GravityEnv
18 changes: 8 additions & 10 deletions gym_extensions/continuous/mujoco/modified_ant.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,27 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
import os.path as osp
from gym_extensions.continuous.mujoco.wall_envs import MazeFactory
from gym_extensions.continuous.mujoco.gravity_envs import GravityEnvFactory
from gym_extensions.continuous.mujoco.perturbed_bodypart_env import ModifiedSizeEnvFactory

from gym_extensions.continuous.mujoco.food_envs import SingleFoodEnvFactory
from gym.envs.mujoco.ant import AntEnv

import os
import gym

AntGravityEnv = lambda *args, **kwargs: GravityEnvFactory(ModifiedAntEnv)(
model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/ant.xml", *args, **kwargs)

AntGravityEnv = lambda *args, **kwargs : GravityEnvFactory(ModifiedAntEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/ant.xml", *args, **kwargs)


AntMaze = lambda *args, **kwargs : MazeFactory(ModifiedAntEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/ant.xml", ori_ind=0, *args, **kwargs)


AntMaze = lambda *args, **kwargs: MazeFactory(ModifiedAntEnv)(
model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/ant.xml", ori_ind=0, *args, **kwargs)

AntSingleFoodEnv = lambda *args, **kwargs: SingleFoodEnvFactory(ModifiedAntEnv)(
model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/ant.xml", *args, **kwargs)

class ModifiedAntEnv(AntEnv, utils.EzPickle):
"""
Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym
"""

def __init__(self, **kwargs):
mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 4)
utils.EzPickle.__init__(self)
Loading