diff --git a/myGym/configs/pnr_medium_multi2.json b/myGym/configs/pnr_medium_multi2.json new file mode 100644 index 00000000..c40c0acf --- /dev/null +++ b/myGym/configs/pnr_medium_multi2.json @@ -0,0 +1,55 @@ +{ + #Environment + "env_name" :"Gym-v0", + "workspace" :"table", + "engine" :"pybullet", + "render" :"opengl", + "camera" :0, + "gui" :0, + "visualize" :0, + "visgym" :0, + #Robot + "robot" :"panda", + "robot_action" :"joints", + "robot_init" :[-0.4, 0.6, 0.5], + "max_velocity" :3, + "max_force" :100, + "action_repeat" :1, + #Task + "task_type" :"pnprot", + "task_objects" :[{"init":{"obj_name":"bar","fixed":0,"rand_rot":0, "sampling_area":[-0.5,0.0,0.35,0.65,0.1,0.1]}, + "goal":{"obj_name":"bar_target","fixed":1,"rand_rot":1, "sampling_area":[-0.5,0.0,0.35,0.65,0.3,0.3]}}, + {"init":{"obj_name":"bar","fixed":0,"rand_rot":0, "sampling_area":[0.1,0.5,0.35,0.65,0.1,0.1]}, + "goal":{"obj_name":"bar_target","fixed":1,"rand_rot":1, "sampling_area":[0.1,0.5,0.35,0.65,0.3,0.3]}}], + "color_dict" : {"cube_holes":[[0.2,0.5,0.2,1]], "target":[[0.3,0.3,0.3,1]]}, + "used_objects" :{"num_range":[0,0], "obj_list":[]}, + // Observation + // actual_state options (pick one): "endeff_xyz", "endeff_6D" (robot end effector), "obj_xyz", "obj_6D", "vae", "yolact", "voxel" or "dope" + // goal_state options (pick one): "obj_xyz", "obj_6D", "vae", "yolact", "voxel" or "dope" + // additional_obs options (pick none/one/more): "joints_xyz", "joints_angles", "endeff_xyz", "endeff_6D", "touch", "distractor" + "observation" : {"actual_state":"obj_6D", "goal_state":"obj_6D", "additional_obs":["endeff_xyz"]}, + #Distractor + "distractors" : {"list":null, "moveable":1, "constant_speed":0, "movement_dims":3, + "movement_endpoints":[-0.3, 0.3, 0.4, 0.7, 0.1, 0.3]}, + #Reward + "reward" :"pnprot", + "distance_type" :"euclidean", + "vae_path" :null, + "yolact_path" :null, + "yolact_config" :null, + #Train + "train_framework" :"tensorflow", + "algo" :"multi", + "num_networks" :3, + "max_episode_steps" :1024, + "algo_steps" :1024, + "steps" :15000000, + "pretrained_model" :null, + "multiprocessing" :false, + #Evaluation + "eval_freq" :1000000, + "eval_episodes" :50, + #Saving and logging + "logdir" :"trained_models/pnr_medium", + "record" :0 + } diff --git a/myGym/configs/train_swipe.json b/myGym/configs/train_swipe.json index c75907c9..f6807a1a 100644 --- a/myGym/configs/train_swipe.json +++ b/myGym/configs/train_swipe.json @@ -17,8 +17,8 @@ "action_repeat" :1, #Task "task_type" :"pnpswipe", - "task_objects" :[{"init":{"obj_name":"sponge","fixed":0,"rand_rot":0, "sampling_area":[-0.4,-0.5,0.4,0.5,0.08,0.08]}, - "goal":{"obj_name":"bar_target","fixed":1,"rand_rot":1, "sampling_area":[-0.0,-0.1,0.4,0.5,0.15,0.15]}}], + "task_objects" :[{"init":{"obj_name":"sponge","fixed":0,"rand_rot":0, "sampling_area":[-0.4,-0.6,0.4,0.6,0.08,0.08]}, + "goal":{"obj_name":"bar_target","fixed":1,"rand_rot":1, "sampling_area":[-0.0,-0.2,0.4,0.6,0.15,0.15]}}], "color_dict" : {"cube_holes":[[0.2,0.5,0.2,1]], "target":[[0.3,0.3,0.3,1]]}, "used_objects" :{"num_range":[0,0], "obj_list":[]}, // Observation @@ -41,11 +41,11 @@ "num_networks" :3, "max_episode_steps" :512, "algo_steps" :512, - "steps" :10000000, + "steps" :15000000, "pretrained_model" :null, "multiprocessing" :false, #Evaluation - "eval_freq" :500000, + "eval_freq" :1000000, "eval_episodes" :50, #Saving and logging "logdir" :"trained_models/debug", diff --git a/myGym/configs/train_swipe_multi2.json b/myGym/configs/train_swipe_multi2.json new file mode 100644 index 00000000..de88210d --- /dev/null +++ b/myGym/configs/train_swipe_multi2.json @@ -0,0 +1,55 @@ +{ + #Environment + "env_name" :"Gym-v0", + "workspace" :"table", + "engine" :"pybullet", + "render" :"opengl", + "camera" :0, + "gui" :0, + "visualize" :0, + "visgym" :0, + #Robot + "robot" :"kuka", + "robot_action" :"joints", + "robot_init" :[-0.4, 0.6, 0.5], + "max_velocity" :3, + "max_force" :100, + "action_repeat" :1, + #Task + "task_type" :"pnpswipe", + "task_objects" :[{"init":{"obj_name":"sponge","fixed":0,"rand_rot":0, "sampling_area":[-0.4,-0.6,0.4,0.6,0.08,0.08]}, + "goal":{"obj_name":"bar_target","fixed":1,"rand_rot":1, "sampling_area":[-0.0,-0.2,0.4,0.6,0.15,0.15]}}, + {"init":{"obj_name":"sponge","fixed":0,"rand_rot":0, "sampling_area":[0.1,0.2,0.4,0.6,0.08,0.08]}, + "goal":{"obj_name":"bar_target","fixed":1,"rand_rot":1, "sampling_area":[0.4,0.5,0.4,0.6,0.15,0.15]}}], + "color_dict" : {"cube_holes":[[0.2,0.5,0.2,1]], "target":[[0.3,0.3,0.3,1]]}, + "used_objects" :{"num_range":[0,0], "obj_list":[]}, + // Observation + // actual_state options (pick one): "endeff_xyz", "endeff_6D" (robot end effector), "obj_xyz", "obj_6D", "vae", "yolact", "voxel" or "dope" + // goal_state options (pick one): "obj_xyz", "obj_6D", "vae", "yolact", "voxel" or "dope" + // additional_obs options (pick none/one/more): "joints_xyz", "joints_angles", "endeff_xyz", "endeff_6D", "touch", "distractor" + "observation" : {"actual_state":"obj_6D", "goal_state":"obj_6D", "additional_obs":["endeff_xyz"]}, + #Distractor + "distractors" : {"list":null, "moveable":1, "constant_speed":0, "movement_dims":3, + "movement_endpoints":[-0.3, 0.3, 0.4, 0.7, 0.1, 0.3]}, + #Reward + "reward" :"pnpswipe", + "distance_type" :"euclidean", + "vae_path" :null, + "yolact_path" :null, + "yolact_config" :null, + #Train + "train_framework" :"tensorflow", + "algo" :"multi", + "num_networks" :3, + "max_episode_steps" :1024, + "algo_steps" :1024, + "steps" :15000000, + "pretrained_model" :null, + "multiprocessing" :false, + #Evaluation + "eval_freq" :1000000, + "eval_episodes" :50, + #Saving and logging + "logdir" :"trained_models/debug", + "record" :0 + } diff --git a/myGym/envs/rewards.py b/myGym/envs/rewards.py index b86a7b7b..c6e87a9f 100644 --- a/myGym/envs/rewards.py +++ b/myGym/envs/rewards.py @@ -1287,22 +1287,22 @@ def gripper_reached_object(self, gripper, object): def find_compute(self, gripper, object): # initial reach - self.env.p.addUserDebugText("find object", [0.7,0.7,0.7], lifeTime=0.1, textColorRGB=[125,0,0]) + self.env.p.addUserDebugText("Subgoal: Pick", [.65, 1., 0.5], lifeTime=0.1, textColorRGB=[125,0,0]) dist = self.task.calc_distance(gripper, object) self.env.p.addUserDebugLine(gripper[:3], object[:3], lifeTime=0.1) - self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.5,0.5,0.7], lifeTime=0.1, textColorRGB=[0, 125, 0]) + self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.65,1,0.55], lifeTime=0.1, textColorRGB=[0, 125, 0]) if self.last_find_dist is None: self.last_find_dist = dist if self.last_owner != 0: self.last_find_dist = dist reward = self.last_find_dist - dist - self.env.p.addUserDebugText(f"Reward:{reward}", [0.7,0.7,1.0], lifeTime=0.1, textColorRGB=[0,125,0]) + self.env.p.addUserDebugText(f"Reward:{reward}", [0.65,1,0.65], lifeTime=0.1, textColorRGB=[0,125,0]) self.last_find_dist = dist if self.task.check_object_moved(self.env.task_objects["actual_state"], threshold=1.2): self.env.episode_over = True self.env.episode_failed = True self.network_rewards[0] += reward - self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[0]}", [0.7,0.7,1.1], lifeTime=0.1, textColorRGB=[0,0,125]) + self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[0]}", [0.65,1,0.7], lifeTime=0.1, textColorRGB=[0,0,125]) return reward def move_compute(self, object, goal): @@ -1329,7 +1329,7 @@ def move_compute(self, object, goal): self.env.episode_over = True ix = 1 if self.num_networks > 1 else 0 self.network_rewards[ix] += reward - self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[ix]}", [0.7,0.7,1.1], lifeTime=0.1, textColorRGB=[0,0,125]) + self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[ix]}", [0.65,1,0.7], lifeTime=0.1, textColorRGB=[0,0,125]) return reward class ThreeStagePnP(TwoStagePnP): @@ -1382,12 +1382,12 @@ def decide(self, observation=None): def move_compute(self, object, goal): # moving object above goal position (forced 2D reach) - self.env.p.addUserDebugText("move", [0.7,0.7,0.7], lifeTime=0.1, textColorRGB=[0,0,125]) + self.env.p.addUserDebugText("Subgoal:move", [.65, 1., 0.5], lifeTime=0.1, textColorRGB=[0,0,125]) object_XY = object[:3] goal_XY = [goal[0], goal[1], goal[2]+0.2] self.env.p.addUserDebugLine(object_XY, goal_XY, lifeTime=0.1) dist = self.task.calc_distance(object_XY, goal_XY) - self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.5,0.5,0.7], lifeTime=0.1, textColorRGB=[0, 125, 0]) + self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.65,1,0.55], lifeTime=0.1, textColorRGB=[0, 125, 0]) if self.last_move_dist is None or self.last_owner != 1: self.last_move_dist = dist reward = self.last_move_dist - dist @@ -1395,16 +1395,16 @@ def move_compute(self, object, goal): self.last_move_dist = dist ix = 1 if self.num_networks > 1 else 0 self.network_rewards[ix] += reward - self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[ix]}", [0.7,0.7,1.1], lifeTime=0.1, textColorRGB=[0,0,125]) + self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[ix]}", [0.65,1,0.7], lifeTime=0.1, textColorRGB=[0,0,125]) return reward def place_compute(self, object, goal): # reach of goal position + task object height in Z axis and release - self.env.p.addUserDebugText("place", [0.7,0.7,0.7], lifeTime=0.1, textColorRGB=[125,125,0]) + self.env.p.addUserDebugText("Subgoal: place", [.65, 1., 0.5], lifeTime=0.1, textColorRGB=[125,125,0]) self.env.p.addUserDebugLine(object[:3], goal[:3], lifeTime=0.1) dist = self.task.calc_distance(object, goal) - self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.5,0.5,0.7], lifeTime=0.1, textColorRGB=[0, 125, 0]) + self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.65,1,0.55], lifeTime=0.1, textColorRGB=[0, 125, 0]) if self.last_place_dist is None or self.last_owner != 2: self.last_place_dist = dist reward = self.last_place_dist - dist @@ -1422,7 +1422,7 @@ def place_compute(self, object, goal): self.env.episode_info = "Task finished in initial configuration" self.env.episode_over = True self.network_rewards[-1] += reward - self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[-1]}", [0.7,0.7,1.1], lifeTime=0.1, textColorRGB=[0,0,125]) + self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[-1]}", [0.65,1,0.7], lifeTime=0.1, textColorRGB=[0,0,125]) return reward def gripper_reached_object(self, gripper, object): @@ -1510,7 +1510,7 @@ def find_compute(self, gripper, object): self.env.p.addUserDebugText("find object", [0.63,1,0.5], lifeTime=0.5, textColorRGB=[125,0,0]) dist = self.task.calc_distance(gripper[:3], object[:3]) self.env.p.addUserDebugLine(gripper[:3], object[:3], lifeTime=0.1) - self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.59,1,0.65], lifeTime=0.5, textColorRGB=[0, 125, 0]) + self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.65,1,0.55], lifeTime=0.5, textColorRGB=[0, 125, 0]) if self.last_find_dist is None: self.last_find_dist = dist if self.last_owner != 0: @@ -1522,7 +1522,7 @@ def find_compute(self, gripper, object): # self.env.episode_over = True # self.env.episode_failed = True self.network_rewards[0] += reward - self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[0]}", [0.59,1,0.6], lifeTime=0.5, textColorRGB=[0,0,125]) + self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[0]}", [0.65,1,0.7], lifeTime=0.5, textColorRGB=[0,0,125]) return reward def move_compute(self, object, goal): @@ -1532,7 +1532,7 @@ def move_compute(self, object, goal): goal_XY = goal[:3] self.env.p.addUserDebugLine(object_XY, goal_XY, lifeTime=0.1) dist = self.task.calc_distance(object_XY, goal_XY) - self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.59,1,0.65], lifeTime=0.5, textColorRGB=[0, 125, 0]) + self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.65,1,0.55], lifeTime=0.5, textColorRGB=[0, 125, 0]) if self.last_move_dist is None or self.last_owner != 1: self.last_move_dist = dist reward = self.last_move_dist - dist @@ -1540,7 +1540,7 @@ def move_compute(self, object, goal): self.last_move_dist = dist ix = 1 if self.num_networks > 1 else 0 self.network_rewards[ix] += reward - self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[ix]}", [0.59,1,0.6], lifeTime=0.5, textColorRGB=[0,0,125]) + self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[ix]}", [0.65,1,0.7], lifeTime=0.5, textColorRGB=[0,0,125]) return reward @@ -1549,13 +1549,13 @@ def rotate_compute(self, object, goal): self.env.p.addUserDebugText("rotate", [0.63,1,0.5], lifeTime=0.1, textColorRGB=[125,125,0]) self.env.p.addUserDebugLine(object[:3], goal[:3], lifeTime=0.1) dist = self.task.calc_distance(object, goal) - self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.59,1,0.65], lifeTime=0.5, textColorRGB=[0, 125, 0]) + self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.65,1,0.55], lifeTime=0.5, textColorRGB=[0, 125, 0]) if self.last_place_dist is None or self.last_owner != 2: self.last_place_dist = dist reward = self.last_place_dist - dist rot = self.task.calc_rot_quat(object, goal) - self.env.p.addUserDebugText("Rotation: {}".format(round(rot,3)), [0.57,1,0.7], lifeTime=0.5, textColorRGB=[0, 222, 100]) + self.env.p.addUserDebugText("Rotation: {}".format(round(rot,3)), [0.65,1,0.6], lifeTime=0.5, textColorRGB=[0, 222, 100]) if self.last_rot_dist is None or self.last_owner != 2: self.last_rot_dist = rot rewardrot = self.last_rot_dist - rot @@ -1574,7 +1574,7 @@ def rotate_compute(self, object, goal): # self.env.episode_info = "Task finished in initial configuration" # self.env.episode_over = True self.network_rewards[-1] += reward - self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[-1]}", [0.59,1,0.6], lifeTime=0.5, textColorRGB=[0,0,125]) + self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[-1]}", [0.65,1,0.7], lifeTime=0.5, textColorRGB=[0,0,125]) return reward def object_near_goal(self, object, goal): @@ -1671,7 +1671,7 @@ def move_compute(self, object, pregoal): self.last_place_dist = dist ix = 1 if self.num_networks > 1 else 0 self.network_rewards[ix] += reward - self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[ix]}", [0.59,1,0.6], lifeTime=0.5, textColorRGB=[0,0,125]) + self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[ix]}", [0.65,1,0.7], lifeTime=0.5, textColorRGB=[0,0,125]) return reward @@ -1710,7 +1710,7 @@ def rotate_compute(self, object, goal): # self.env.episode_info = "Task finished in initial configuration" # self.env.episode_over = True self.network_rewards[2] += reward - self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[-1]}", [0.59,1,0.6], lifeTime=0.5, textColorRGB=[0,0,125]) + self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[-1]}", [0.65,1,0.7], lifeTime=0.5, textColorRGB=[0,0,125]) return reward def object_ready_swipe(self, object, goal):