include epsilon-greedy

cc299792458 · Oct 11, 2024 · e9f156b · e9f156b
1 parent 030b8a5
commit e9f156b
Show file tree

Hide file tree

Showing 5 changed files with 17 additions and 6 deletions.
diff --git a/tossingbot/envs/pybullet/tasks/toss_objects.py b/tossingbot/envs/pybullet/tasks/toss_objects.py
@@ -442,7 +442,8 @@ def pre_control_step(self):
             self.grasp_completed = self.robot.grasp(tcp_target_pose=self.grasp_pose, post_grasp_pose=self.post_grasp_pose)
             if self.grasp_completed:
                 self.check_grasp_success()
-                self.consecutive_grasp_failures = self.consecutive_grasp_failures + 1 if not self.grasp_success else 0
+                if self.task_config['use_heuristic']:
+                    self.consecutive_grasp_failures = self.consecutive_grasp_failures + 1 if not self.grasp_success else 0
         elif not self.grasp_success:
             is_action_finished = True
         elif not self.throw_completed:

diff --git a/tossingbot/experiments/toss_objects/test_physics_agent.py b/tossingbot/experiments/toss_objects/test_physics_agent.py
@@ -97,4 +97,4 @@
 
     # Print final results
     print(f"Average Grasp Success Rate: {avg_grasp_success:.3f}")   # Average Grasp Success Rate: 1.000
-    print(f"Average Throw Success Rate (for successful grasps): {avg_throw_success:.3f}")   # Average Throw Success Rate (for successful grasps): 1.000
+    print(f"Average Throw Success Rate (for successful grasps): {avg_throw_success:.3f}")   # Average Throw Success Rate (for successful grasps): 0.990
diff --git a/tossingbot/experiments/toss_objects/test_residual_physics_agent.py b/tossingbot/experiments/toss_objects/test_residual_physics_agent.py
@@ -96,6 +96,6 @@
     avg_grasp_success = np.mean(grasp_success_history)
     avg_throw_success = np.mean(throw_success_history) if throw_success_history else 0.0
 
-    # Grasp Success: 1.000 | Throw Success: 0.960 for 2000 steps, 0.930 for 1000 steps, 0.869 for 300 steps 
+    # Grasp Success: 1.000 | Throw Success: 0.990
     print(f"Average Grasp Success Rate: {avg_grasp_success:.3f}")
     print(f"Average Throw Success Rate (for successful grasps): {avg_throw_success:.3f}")
diff --git a/tossingbot/experiments/toss_objects/train_physics_agent.py b/tossingbot/experiments/toss_objects/train_physics_agent.py
@@ -66,11 +66,12 @@ def plot_success_rates(avg_grasp_success_history, avg_throw_success_history, log
     use_gui = False
     box_length = 0.15
     box_n_rows, box_n_cols = 3, 3
+    use_heuristic = False
 
     n_rotations = 1
     phi_deg = 45
 
-    total_episodes = 100
+    total_episodes = 200
 
     # Lists to track the cumulative success rates over all episodes
     avg_grasp_success_history = []
@@ -88,6 +89,9 @@ def plot_success_rates(avg_grasp_success_history, avg_throw_success_history, log
             'box_n_cols': box_n_cols,
             'box_length': box_length,
         },
+        task_config={
+            'use_heuristic': use_heuristic,
+        },
         objects_config={"object_types": ['ball', 'cube']},
         camera_config={'n_rotations': n_rotations},
     )
@@ -114,7 +118,8 @@ def plot_success_rates(avg_grasp_success_history, avg_throw_success_history, log
         grasping_module=grasping_module, 
         throwing_module=throwing_module,
         physics_controller=physics_controller,
-        epsilons=[0.0, 0.0]  # Disable epsilon-greedy for this test
+        epsilons=[0.5, 0.1],
+        total_episodes=total_episodes,
     )
 
     # Optimizer

diff --git a/tossingbot/experiments/toss_objects/train_residual_physics_agent.py b/tossingbot/experiments/toss_objects/train_residual_physics_agent.py
@@ -65,6 +65,7 @@ def plot_success_rates(avg_grasp_success_history, avg_throw_success_history, log
     use_gui = False
     box_length = 0.15
     box_n_rows, box_n_cols = 3, 3
+    use_heuristic = False
 
     n_rotations = 1
     phi_deg = 45
@@ -87,6 +88,9 @@ def plot_success_rates(avg_grasp_success_history, avg_throw_success_history, log
             'box_n_cols': box_n_cols,
             'box_length': box_length,
         },
+        task_config={
+            'use_heuristic': use_heuristic,
+        },
         objects_config={"object_types": ['ball', 'cube']},
         camera_config={'n_rotations': n_rotations}
     )
@@ -113,7 +117,8 @@ def plot_success_rates(avg_grasp_success_history, avg_throw_success_history, log
         grasping_module=grasping_module, 
         throwing_module=throwing_module,
         physics_controller=physics_controller,
-        epsilons=[0.0, 0.0] # Disable epsilon-greedy for this test
+        epsilons=[0.5, 0.1],
+        total_episodes=total_episodes,
     )
 
     # Optimizer