V0.5.3 release (haosulab#152)

* fix haosulab#142 * docs and add test for sequence rng reproducibility * add check for nan actions * Update test_envs.py * Update test_envs.py --------- Co-authored-by: Xuanlin (Simon) Li <[email protected]>
anji993 · Sep 21, 2023 · bff9cf1 · bff9cf1
1 parent 5e0f0fe
commit bff9cf1
Show file tree

Hide file tree

Showing 5 changed files with 37 additions and 6 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -12,8 +12,8 @@ We recommend using Python 3.9 to build and develop on ManiSkill2 (MS2), although
 conda create -n "ms2_dev" "python==3.9"
 git clone https://github.com/haosulab/ManiSkill2.git
 cd ManiSkill2
-pip install -e .
-pip install pytest coverage
+pip install -e . # install MS2 locally
+pip install pytest coverage stable-baselines3 # add development dependencies for testing purposes
 ```
 
 ## Testing

diff --git a/mani_skill2/agents/base_agent.py b/mani_skill2/agents/base_agent.py
@@ -159,6 +159,7 @@ def reset(self, init_qpos=None):
         self.set_control_mode(self._default_control_mode)
 
     def set_action(self, action):
+        if np.isnan(action).any(): raise ValueError("Action cannot be NaN. Environment received:", action)
         self.controller.set_action(action)
 
     def before_simulation_step(self):

diff --git a/mani_skill2/envs/pick_and_place/stack_cube.py b/mani_skill2/envs/pick_and_place/stack_cube.py
@@ -119,11 +119,11 @@ def _check_cubeA_on_cubeB(self):
     def evaluate(self, **kwargs):
         is_cubeA_on_cubeB = self._check_cubeA_on_cubeB()
         is_cubeA_static = check_actor_static(self.cubeA)
-        is_cubaA_grasped = self.agent.check_grasp(self.cubeA)
-        success = is_cubeA_on_cubeB and is_cubeA_static and (not is_cubaA_grasped)
+        is_cubeA_grasped = self.agent.check_grasp(self.cubeA)
+        success = is_cubeA_on_cubeB and is_cubeA_static and (not is_cubeA_grasped)
 
         return {
-            "is_cubaA_grasped": is_cubaA_grasped,
+            "is_cubaA_grasped": is_cubeA_grasped,
             "is_cubeA_on_cubeB": is_cubeA_on_cubeB,
             "is_cubeA_static": is_cubeA_static,
             # "cubeA_vel": np.linalg.norm(self.cubeA.velocity),

diff --git a/mani_skill2/envs/sapien_env.py b/mani_skill2/envs/sapien_env.py
@@ -472,8 +472,12 @@ def _load_background(self):
     def reset(self, seed=None, options=None):
         if options is None:
             options = dict()
+
+        # when giving a specific seed, we always set the main RNG based on that seed. This then deterministically changes the **sequence** of RNG 
+        # used for each episode after each call to reset with seed=none. By default this sequence of rng starts with the default main seed used which is 2022,
+        # which means that when creating an environment and resetting without a seed, it will always have the same sequence of RNG for each episode.
         self.set_main_rng(seed)
-        self.set_episode_rng(seed)
+        self.set_episode_rng(seed) # we first set the first episode seed to allow environments to use it to reconfigure the environment with a seed
         self._elapsed_steps = 0
         reconfigure = options.get("reconfigure", False)
         if reconfigure:

diff --git a/tests/test_envs.py b/tests/test_envs.py
@@ -51,6 +51,32 @@ def test_env_seeded_reset():
     env.close()
     del env
 
+def test_env_seeded_sequence_reset():
+    N = 17
+    env = gym.make(ENV_IDS[0], max_episode_steps=5)
+    obs, _ = env.reset(seed=2000)
+    actions = [env.action_space.sample() for _ in range(N)]
+    for i in range(N):
+        first_obs, _, _, truncated, _ = env.step(actions[i])
+        if truncated:
+            first_obs, _ = env.reset()
+    obs, _ = env.reset(seed=2000)
+    for i in range(N):
+        obs, _, _, truncated, _ = env.step(actions[i])
+        if truncated:
+            obs, _ = env.reset()
+    env.close()
+    assert_obs_equal(obs, first_obs)
+    del env
+
+def test_env_raise_value_error_for_nan_actions():
+    env = gym.make(ENV_IDS[0])
+    obs, _ = env.reset(seed=2000)
+    with pytest.raises(ValueError):
+        env.step(env.action_space.sample() * np.nan)
+    env.close()
+    del env
+
 
 @pytest.mark.parametrize("env_id", ENV_IDS)
 def test_states(env_id):