Breakend · Rowing0914 · Jul 18, 2019 · Jul 18, 2019 · Jul 18, 2019 · Jul 19, 2019
diff --git a/.gitignore b/.gitignore
@@ -103,3 +103,6 @@ ENV/
 .mypy_cache/
 
 data/
+/.pytest_cache/
+
+.idea
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@ This python package is an extension to OpenAI Gym for auxiliary tasks (multitask
 
 ## Dependencies
 
-- [Python 3.5.2](https://www.python.org/)
+- [Python 3.5.2](https://www.python.org/) and [Python 3.6.x](https://www.python.org/)
 - [OpenAI Gym](https://gym.openai.com/)
 - [MuJoCo](http://mujoco.org/) (Optional)
 - [mujoco-py](https://github.com/openai/mujoco-py#install-mujoco) (Optional)
@@ -36,9 +36,20 @@ at `~/.mujoco/mjkey.txt`
 pip3 install -e .[mujoco]
 ```
 
+#### MuJoCo Env naming Rule
+Please check the official [[mujoco_py]](https://github.com/openai/mujoco-py)
+- `env_name-v0`: MuJoCo version < 2.0
+- `env_name-v1`: MuJoCo version == 2.0
+
 ### Test Installation
+You have two ways to test it. 
 ```
-nosetests -v gym_extensions
+# 1.
+$ nosetests -v gym_extensions
+
+# 2. MuJoCo test
+$ cd tests
+$ python simple_test.py
 ```
 
 ### Possible Issues
@@ -75,10 +86,32 @@ env = gym.make("State-Based-Navigation-2d-Map1-Goal1-v0")
 ```
 
 ```python
+import gym
 from gym_extensions.continuous import mujoco
-env = gym.make("HopperWall-v0")
+
+# env = gym.make("HopperGravityHalf-v0") # MuJoCo before ver2.0
+env = gym.make("HopperGravityHalf-v1") # MuJoCo ver2.0
+
+env.reset()
+for _ in range(1000):
+    env.render()
+    env.step(env.action_space.sample()) # take a random action
+env.close()
 ```
 
+```python
+import gym
+import gym_extensions.continuous.mujoco.nervenet_envs.register as register
+
+env = gym.make("WalkersHopperthree-v1")
+env.reset()
+
+for _ in range(100*3):
+    env.render()
+    action = env.action_space.sample()
+    state, reward, done, info = env.step(action)
+
+```
 
 ## More info
 

diff --git a/gym_extensions/continuous/mujoco/__init__.py b/gym_extensions/continuous/mujoco/__init__.py
diff --git a/gym_extensions/continuous/mujoco/food_envs.py b/gym_extensions/continuous/mujoco/food_envs.py
@@ -0,0 +1,194 @@
+import tempfile
+import xml.etree.ElementTree as ET
+import numpy as np
+import random
+from gym import utils
+import math, pyrr
+from gym_extensions.continuous.mujoco.wall_envs import isclose, rotate_vector
+
+FOOD_BONUS = 100
+
+
+def SingleFoodEnvFactory(class_type):
+    """class_type should be an OpenAI gym time"""
+
+    class SingleFoodEnv(class_type, utils.EzPickle):
+        """
+        This provides a piece of frictionless food, which the agent must try to seek in the env.
+        we might be using visual observation instead of sensors installed.
+        """
+
+        def __init__(
+                self,
+                model_path,
+                food_height=.25,
+                food_pos_range=([1.5, 1.5], [2.5, 2.5]),
+                # food_pos_range=([0.5, 0.5], [0.8, 0.8]), # test purpose
+                n_bins=10,
+                sensor_range=10.,
+                sensor_span=math.pi / 2,
+                *args,
+                **kwargs):
+
+            # TODO: temp workaround: seems openai gym requires this tho, I don't better way to specify this.
+            self._seed = 0
+
+            self._n_bins = n_bins
+            # Add a sensor
+            self._sensor_range = sensor_range
+            self._sensor_span = sensor_span
+
+            tree = ET.parse(model_path)
+            worldbody = tree.find(".//worldbody")
+
+            height = food_height
+            self.food_pos_range = food_pos_range
+            rand_x = random.uniform(food_pos_range[0][0], food_pos_range[1][0])
+            rand_y = random.uniform(food_pos_range[0][1], food_pos_range[1][1])
+            self.food_pos = food_pos = (rand_x, rand_y)
+            torso_x, torso_y = 0, 0
+            self._init_torso_x = torso_x
+            self.class_type = class_type
+            self._init_torso_y = torso_y
+            self.food_size = (0.2, 0.4, height)
+
+            ET.SubElement(
+                worldbody, "geom",
+                name="food",
+                pos="%f %f %f" % (food_pos[0],
+                                  food_pos[1],
+                                  height / 2.),
+                size="%f %f %f" % self.food_size,
+                type="sphere",
+                material="",
+                contype="1",
+                conaffinity="1",
+                density="0.00001",
+                rgba="1.0 0. 1. 1"
+            )
+
+            torso = tree.find(".//body[@name='torso']")
+            geoms = torso.findall(".//geom")
+            for geom in geoms:
+                if 'name' not in geom.attrib:
+                    raise Exception("Every geom of the torso must have a name "
+                                    "defined")
+
+            # MuJoCo200 only accepts the file extension of "xml"
+            _, file_path = tempfile.mkstemp(suffix=".xml", text=True)
+            tree.write(file_path)
+
+            # self._goal_range = self._find_goal_range()
+            self._cached_segments = None
+
+            class_type.__init__(self, model_path=file_path)
+            utils.EzPickle.__init__(self)
+
+            # import pdb; pdb.set_trace()
+
+        def get_body_xquat(self, body_name):
+            idx = self.model.body_names.index(body_name)
+            return self.sim.data.body_xquat[idx]
+
+        def reset(self):
+            temp = np.copy(self.model.geom_pos)
+
+            rand_x = random.uniform(self.food_pos_range[0][0], self.food_pos_range[1][0])
+            rand_y = random.uniform(self.food_pos_range[0][1], self.food_pos_range[1][1])
+
+            # TODO: make this more robust,
+            # hardcoding that the second geom is the food,
+            # but we should do something more robust??
+            assert isclose(temp[1][0], self.food_pos[0])
+            assert isclose(temp[1][1], self.food_pos[1])
+
+            self.food_pos = (rand_x, rand_y)
+            self.model.geom_pos[1][0] = self.food_pos[0]
+            self.model.geom_pos[1][1] = self.food_pos[1]
+            ob = super(SingleFoodEnv, self).reset()
+            return ob
+
+        def _get_obs(self):
+            """
+            The observation would include both information about
+            the robot itself as well as the sensors around its environment
+            """
+            robot_x, robot_y, robot_z = robot_coords = self.get_body_com("torso")
+            food_readings = np.zeros(self._n_bins)
+
+            for ray_idx in range(self._n_bins):
+                # self._sensor_span * 0.5 + 1.0 * (2 * ray_idx + 1) / (2 * self._n_bins) * self._sensor_span
+                theta = (self._sensor_span / self._n_bins) * ray_idx - self._sensor_span / 2.
+                forward_normal = rotate_vector(np.array([1, 0, 0]), [0, 1, 0], theta)
+                # Note: Mujoco quaternions use [w, x, y, z] convention
+                quat_mujoco = self.get_body_xquat("torso")
+                quat = [quat_mujoco[1], quat_mujoco[2], quat_mujoco[3], quat_mujoco[0]]
+                ray_direction = pyrr.quaternion.apply_to_vector(quat, forward_normal)
+                ray = pyrr.ray.create(robot_coords, ray_direction)
+
+                bottom_point = [self.food_pos[0] - self.food_size[0] / 2.,
+                                self.food_pos[1] - self.food_size[1] / 2.,
+                                0.]
+                top_point = [self.food_pos[0] + self.food_size[0] / 2.,
+                             self.food_pos[1] + self.food_size[1] / 2.,
+                             self.food_size[2]]
+
+                # import pdb; pdb.set_trace()
+                bounding_box = pyrr.aabb.create_from_points([bottom_point, top_point])
+                intersection = pyrr.geometric_tests.ray_intersect_aabb(ray, bounding_box)
+
+                if intersection is not None:
+                    distance = np.linalg.norm(intersection - robot_coords)
+                    if distance <= self._sensor_range:
+                        food_readings[ray_idx] = distance / self._sensor_range
+
+            obs = np.concatenate([
+                self.class_type._get_obs(self),
+                food_readings
+            ])
+
+            return obs
+
+        def _is_in_collision(self, pos):
+            x, y = pos
+
+            minx = self.food_pos[0] * 1 - 1 * 0.5 - self._init_torso_x
+            maxx = self.food_pos[0] * 1 + 1 * 0.5 - self._init_torso_x
+            miny = self.food_pos[1] * 1 - 1 * 0.5 - self._init_torso_y
+            maxy = self.food_pos[1] * 1 + 1 * 0.5 - self._init_torso_y
+            if minx <= x <= maxx and miny <= y <= maxy:
+                return True
+            return False
+
+        def _get_food_bonus(self):
+            robot_x, robot_y, robot_z = self.get_body_com("torso")
+            if self._is_in_collision((robot_x, robot_y)):
+                return FOOD_BONUS
+            else:
+                return 0
+
+        def _is_done(self):
+            robot_x, robot_y, robot_z = self.get_body_com("torso")
+            if self._is_in_collision((robot_x, robot_y)):
+                return True
+            else:
+                return False
+
+        def get_xy(self):
+            return self.get_body_com("torso")[:2]
+
+        def step(self, action):
+            state, reward, done, info = super(SingleFoodEnv, self).step(action)
+
+            next_obs = self._get_obs()
+            reward += self._get_food_bonus()  # TODO: think about the reward func design
+            done = self._is_done()
+
+            x, y = self.get_body_com("torso")[:2]
+            return next_obs, reward, done, info
+
+        def action_from_key(self, key):
+            return self.action_from_key(key)
+
+    return SingleFoodEnv
+
diff --git a/gym_extensions/continuous/mujoco/gravity_envs.py b/gym_extensions/continuous/mujoco/gravity_envs.py
@@ -1,15 +1,12 @@
-import os.path as osp
-import tempfile
-import xml.etree.ElementTree as ET
-import math
-
-import numpy as np
-import gym
-import random
-import os
 from gym import utils
 from gym.envs.mujoco import mujoco_env
-import mujoco_py
+
+""" As of 18/07/2019
+Check this out!
+This can be good reference to modify some attributes in `mujoco_py`
+https://gist.github.com/machinaut/9bd1473c763554086c176d39062700b0
+"""
+
 
 def GravityEnvFactory(class_type):
     """class_type should be an OpenAI gym type"""
@@ -18,6 +15,7 @@ class GravityEnv(class_type, utils.EzPickle):
         """
         Allows the gravity to be changed by the
         """
+
         def __init__(
                 self,
                 model_path,
@@ -30,8 +28,8 @@ def __init__(
             # make sure we're using a proper OpenAI gym Mujoco Env
             assert isinstance(self, mujoco_env.MujocoEnv)
 
-            self.model.opt.gravity = (mujoco_py.mjtypes.c_double * 3)(*[0., 0., gravity])
-            self.model._compute_subtree()
-            self.model.forward()
+            self.model.opt.gravity[0] = 0.
+            self.model.opt.gravity[1] = 0.
+            self.model.opt.gravity[2] = gravity * 3
 
     return GravityEnv
diff --git a/gym_extensions/continuous/mujoco/modified_ant.py b/gym_extensions/continuous/mujoco/modified_ant.py
@@ -1,29 +1,27 @@
-import numpy as np
 from gym import utils
 from gym.envs.mujoco import mujoco_env
-import os.path as osp
 from gym_extensions.continuous.mujoco.wall_envs import MazeFactory
 from gym_extensions.continuous.mujoco.gravity_envs import GravityEnvFactory
-from gym_extensions.continuous.mujoco.perturbed_bodypart_env import ModifiedSizeEnvFactory
-
+from gym_extensions.continuous.mujoco.food_envs import SingleFoodEnvFactory
 from gym.envs.mujoco.ant import AntEnv
 
 import os
 import gym
 
+AntGravityEnv = lambda *args, **kwargs: GravityEnvFactory(ModifiedAntEnv)(
+    model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/ant.xml", *args, **kwargs)
 
-AntGravityEnv = lambda *args, **kwargs : GravityEnvFactory(ModifiedAntEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/ant.xml", *args, **kwargs)
-
-
-AntMaze = lambda *args, **kwargs : MazeFactory(ModifiedAntEnv)(model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/ant.xml", ori_ind=0, *args, **kwargs)
-
-
+AntMaze = lambda *args, **kwargs: MazeFactory(ModifiedAntEnv)(
+    model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/ant.xml", ori_ind=0, *args, **kwargs)
 
+AntSingleFoodEnv = lambda *args, **kwargs: SingleFoodEnvFactory(ModifiedAntEnv)(
+    model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/ant.xml", *args, **kwargs)
 
 class ModifiedAntEnv(AntEnv, utils.EzPickle):
     """
     Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym
     """
+
     def __init__(self, **kwargs):
         mujoco_env.MujocoEnv.__init__(self, kwargs["model_path"], 4)
         utils.EzPickle.__init__(self)
-Original file line number
+Diff line change
@@ Expand Up / @@ -103,3 +103,6 @@ ENV/ @@
     .mypy_cache/
     data/
+    /.pytest_cache/
+    .idea