Skip to content

Commit

Permalink
config update - swipe and pnr
Browse files Browse the repository at this point in the history
  • Loading branch information
michalvavrecka committed Jan 21, 2023
1 parent 55fa5b4 commit ee12746
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 24 deletions.
55 changes: 55 additions & 0 deletions myGym/configs/pnr_medium_multi2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
#Environment
"env_name" :"Gym-v0",
"workspace" :"table",
"engine" :"pybullet",
"render" :"opengl",
"camera" :0,
"gui" :0,
"visualize" :0,
"visgym" :0,
#Robot
"robot" :"panda",
"robot_action" :"joints",
"robot_init" :[-0.4, 0.6, 0.5],
"max_velocity" :3,
"max_force" :100,
"action_repeat" :1,
#Task
"task_type" :"pnprot",
"task_objects" :[{"init":{"obj_name":"bar","fixed":0,"rand_rot":0, "sampling_area":[-0.5,0.0,0.35,0.65,0.1,0.1]},
"goal":{"obj_name":"bar_target","fixed":1,"rand_rot":1, "sampling_area":[-0.5,0.0,0.35,0.65,0.3,0.3]}},
{"init":{"obj_name":"bar","fixed":0,"rand_rot":0, "sampling_area":[0.1,0.5,0.35,0.65,0.1,0.1]},
"goal":{"obj_name":"bar_target","fixed":1,"rand_rot":1, "sampling_area":[0.1,0.5,0.35,0.65,0.3,0.3]}}],
"color_dict" : {"cube_holes":[[0.2,0.5,0.2,1]], "target":[[0.3,0.3,0.3,1]]},
"used_objects" :{"num_range":[0,0], "obj_list":[]},
// Observation
// actual_state options (pick one): "endeff_xyz", "endeff_6D" (robot end effector), "obj_xyz", "obj_6D", "vae", "yolact", "voxel" or "dope"
// goal_state options (pick one): "obj_xyz", "obj_6D", "vae", "yolact", "voxel" or "dope"
// additional_obs options (pick none/one/more): "joints_xyz", "joints_angles", "endeff_xyz", "endeff_6D", "touch", "distractor"
"observation" : {"actual_state":"obj_6D", "goal_state":"obj_6D", "additional_obs":["endeff_xyz"]},
#Distractor
"distractors" : {"list":null, "moveable":1, "constant_speed":0, "movement_dims":3,
"movement_endpoints":[-0.3, 0.3, 0.4, 0.7, 0.1, 0.3]},
#Reward
"reward" :"pnprot",
"distance_type" :"euclidean",
"vae_path" :null,
"yolact_path" :null,
"yolact_config" :null,
#Train
"train_framework" :"tensorflow",
"algo" :"multi",
"num_networks" :3,
"max_episode_steps" :1024,
"algo_steps" :1024,
"steps" :15000000,
"pretrained_model" :null,
"multiprocessing" :false,
#Evaluation
"eval_freq" :1000000,
"eval_episodes" :50,
#Saving and logging
"logdir" :"trained_models/pnr_medium",
"record" :0
}
8 changes: 4 additions & 4 deletions myGym/configs/train_swipe.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
"action_repeat" :1,
#Task
"task_type" :"pnpswipe",
"task_objects" :[{"init":{"obj_name":"sponge","fixed":0,"rand_rot":0, "sampling_area":[-0.4,-0.5,0.4,0.5,0.08,0.08]},
"goal":{"obj_name":"bar_target","fixed":1,"rand_rot":1, "sampling_area":[-0.0,-0.1,0.4,0.5,0.15,0.15]}}],
"task_objects" :[{"init":{"obj_name":"sponge","fixed":0,"rand_rot":0, "sampling_area":[-0.4,-0.6,0.4,0.6,0.08,0.08]},
"goal":{"obj_name":"bar_target","fixed":1,"rand_rot":1, "sampling_area":[-0.0,-0.2,0.4,0.6,0.15,0.15]}}],
"color_dict" : {"cube_holes":[[0.2,0.5,0.2,1]], "target":[[0.3,0.3,0.3,1]]},
"used_objects" :{"num_range":[0,0], "obj_list":[]},
// Observation
Expand All @@ -41,11 +41,11 @@
"num_networks" :3,
"max_episode_steps" :512,
"algo_steps" :512,
"steps" :10000000,
"steps" :15000000,
"pretrained_model" :null,
"multiprocessing" :false,
#Evaluation
"eval_freq" :500000,
"eval_freq" :1000000,
"eval_episodes" :50,
#Saving and logging
"logdir" :"trained_models/debug",
Expand Down
55 changes: 55 additions & 0 deletions myGym/configs/train_swipe_multi2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
#Environment
"env_name" :"Gym-v0",
"workspace" :"table",
"engine" :"pybullet",
"render" :"opengl",
"camera" :0,
"gui" :0,
"visualize" :0,
"visgym" :0,
#Robot
"robot" :"kuka",
"robot_action" :"joints",
"robot_init" :[-0.4, 0.6, 0.5],
"max_velocity" :3,
"max_force" :100,
"action_repeat" :1,
#Task
"task_type" :"pnpswipe",
"task_objects" :[{"init":{"obj_name":"sponge","fixed":0,"rand_rot":0, "sampling_area":[-0.4,-0.6,0.4,0.6,0.08,0.08]},
"goal":{"obj_name":"bar_target","fixed":1,"rand_rot":1, "sampling_area":[-0.0,-0.2,0.4,0.6,0.15,0.15]}},
{"init":{"obj_name":"sponge","fixed":0,"rand_rot":0, "sampling_area":[0.1,0.2,0.4,0.6,0.08,0.08]},
"goal":{"obj_name":"bar_target","fixed":1,"rand_rot":1, "sampling_area":[0.4,0.5,0.4,0.6,0.15,0.15]}}],
"color_dict" : {"cube_holes":[[0.2,0.5,0.2,1]], "target":[[0.3,0.3,0.3,1]]},
"used_objects" :{"num_range":[0,0], "obj_list":[]},
// Observation
// actual_state options (pick one): "endeff_xyz", "endeff_6D" (robot end effector), "obj_xyz", "obj_6D", "vae", "yolact", "voxel" or "dope"
// goal_state options (pick one): "obj_xyz", "obj_6D", "vae", "yolact", "voxel" or "dope"
// additional_obs options (pick none/one/more): "joints_xyz", "joints_angles", "endeff_xyz", "endeff_6D", "touch", "distractor"
"observation" : {"actual_state":"obj_6D", "goal_state":"obj_6D", "additional_obs":["endeff_xyz"]},
#Distractor
"distractors" : {"list":null, "moveable":1, "constant_speed":0, "movement_dims":3,
"movement_endpoints":[-0.3, 0.3, 0.4, 0.7, 0.1, 0.3]},
#Reward
"reward" :"pnpswipe",
"distance_type" :"euclidean",
"vae_path" :null,
"yolact_path" :null,
"yolact_config" :null,
#Train
"train_framework" :"tensorflow",
"algo" :"multi",
"num_networks" :3,
"max_episode_steps" :1024,
"algo_steps" :1024,
"steps" :15000000,
"pretrained_model" :null,
"multiprocessing" :false,
#Evaluation
"eval_freq" :1000000,
"eval_episodes" :50,
#Saving and logging
"logdir" :"trained_models/debug",
"record" :0
}
40 changes: 20 additions & 20 deletions myGym/envs/rewards.py
Original file line number Diff line number Diff line change
Expand Up @@ -1287,22 +1287,22 @@ def gripper_reached_object(self, gripper, object):

def find_compute(self, gripper, object):
# initial reach
self.env.p.addUserDebugText("find object", [0.7,0.7,0.7], lifeTime=0.1, textColorRGB=[125,0,0])
self.env.p.addUserDebugText("Subgoal: Pick", [.65, 1., 0.5], lifeTime=0.1, textColorRGB=[125,0,0])
dist = self.task.calc_distance(gripper, object)
self.env.p.addUserDebugLine(gripper[:3], object[:3], lifeTime=0.1)
self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.5,0.5,0.7], lifeTime=0.1, textColorRGB=[0, 125, 0])
self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.65,1,0.55], lifeTime=0.1, textColorRGB=[0, 125, 0])
if self.last_find_dist is None:
self.last_find_dist = dist
if self.last_owner != 0:
self.last_find_dist = dist
reward = self.last_find_dist - dist
self.env.p.addUserDebugText(f"Reward:{reward}", [0.7,0.7,1.0], lifeTime=0.1, textColorRGB=[0,125,0])
self.env.p.addUserDebugText(f"Reward:{reward}", [0.65,1,0.65], lifeTime=0.1, textColorRGB=[0,125,0])
self.last_find_dist = dist
if self.task.check_object_moved(self.env.task_objects["actual_state"], threshold=1.2):
self.env.episode_over = True
self.env.episode_failed = True
self.network_rewards[0] += reward
self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[0]}", [0.7,0.7,1.1], lifeTime=0.1, textColorRGB=[0,0,125])
self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[0]}", [0.65,1,0.7], lifeTime=0.1, textColorRGB=[0,0,125])
return reward

def move_compute(self, object, goal):
Expand All @@ -1329,7 +1329,7 @@ def move_compute(self, object, goal):
self.env.episode_over = True
ix = 1 if self.num_networks > 1 else 0
self.network_rewards[ix] += reward
self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[ix]}", [0.7,0.7,1.1], lifeTime=0.1, textColorRGB=[0,0,125])
self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[ix]}", [0.65,1,0.7], lifeTime=0.1, textColorRGB=[0,0,125])
return reward

class ThreeStagePnP(TwoStagePnP):
Expand Down Expand Up @@ -1382,29 +1382,29 @@ def decide(self, observation=None):

def move_compute(self, object, goal):
# moving object above goal position (forced 2D reach)
self.env.p.addUserDebugText("move", [0.7,0.7,0.7], lifeTime=0.1, textColorRGB=[0,0,125])
self.env.p.addUserDebugText("Subgoal:move", [.65, 1., 0.5], lifeTime=0.1, textColorRGB=[0,0,125])
object_XY = object[:3]
goal_XY = [goal[0], goal[1], goal[2]+0.2]
self.env.p.addUserDebugLine(object_XY, goal_XY, lifeTime=0.1)
dist = self.task.calc_distance(object_XY, goal_XY)
self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.5,0.5,0.7], lifeTime=0.1, textColorRGB=[0, 125, 0])
self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.65,1,0.55], lifeTime=0.1, textColorRGB=[0, 125, 0])
if self.last_move_dist is None or self.last_owner != 1:
self.last_move_dist = dist
reward = self.last_move_dist - dist
self.env.p.addUserDebugText(f"Reward:{reward}", [0.7,0.7,1.2], lifeTime=0.1, textColorRGB=[0,125,0])
self.last_move_dist = dist
ix = 1 if self.num_networks > 1 else 0
self.network_rewards[ix] += reward
self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[ix]}", [0.7,0.7,1.1], lifeTime=0.1, textColorRGB=[0,0,125])
self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[ix]}", [0.65,1,0.7], lifeTime=0.1, textColorRGB=[0,0,125])
return reward


def place_compute(self, object, goal):
# reach of goal position + task object height in Z axis and release
self.env.p.addUserDebugText("place", [0.7,0.7,0.7], lifeTime=0.1, textColorRGB=[125,125,0])
self.env.p.addUserDebugText("Subgoal: place", [.65, 1., 0.5], lifeTime=0.1, textColorRGB=[125,125,0])
self.env.p.addUserDebugLine(object[:3], goal[:3], lifeTime=0.1)
dist = self.task.calc_distance(object, goal)
self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.5,0.5,0.7], lifeTime=0.1, textColorRGB=[0, 125, 0])
self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.65,1,0.55], lifeTime=0.1, textColorRGB=[0, 125, 0])
if self.last_place_dist is None or self.last_owner != 2:
self.last_place_dist = dist
reward = self.last_place_dist - dist
Expand All @@ -1422,7 +1422,7 @@ def place_compute(self, object, goal):
self.env.episode_info = "Task finished in initial configuration"
self.env.episode_over = True
self.network_rewards[-1] += reward
self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[-1]}", [0.7,0.7,1.1], lifeTime=0.1, textColorRGB=[0,0,125])
self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[-1]}", [0.65,1,0.7], lifeTime=0.1, textColorRGB=[0,0,125])
return reward

def gripper_reached_object(self, gripper, object):
Expand Down Expand Up @@ -1510,7 +1510,7 @@ def find_compute(self, gripper, object):
self.env.p.addUserDebugText("find object", [0.63,1,0.5], lifeTime=0.5, textColorRGB=[125,0,0])
dist = self.task.calc_distance(gripper[:3], object[:3])
self.env.p.addUserDebugLine(gripper[:3], object[:3], lifeTime=0.1)
self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.59,1,0.65], lifeTime=0.5, textColorRGB=[0, 125, 0])
self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.65,1,0.55], lifeTime=0.5, textColorRGB=[0, 125, 0])
if self.last_find_dist is None:
self.last_find_dist = dist
if self.last_owner != 0:
Expand All @@ -1522,7 +1522,7 @@ def find_compute(self, gripper, object):
# self.env.episode_over = True
# self.env.episode_failed = True
self.network_rewards[0] += reward
self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[0]}", [0.59,1,0.6], lifeTime=0.5, textColorRGB=[0,0,125])
self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[0]}", [0.65,1,0.7], lifeTime=0.5, textColorRGB=[0,0,125])
return reward

def move_compute(self, object, goal):
Expand All @@ -1532,15 +1532,15 @@ def move_compute(self, object, goal):
goal_XY = goal[:3]
self.env.p.addUserDebugLine(object_XY, goal_XY, lifeTime=0.1)
dist = self.task.calc_distance(object_XY, goal_XY)
self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.59,1,0.65], lifeTime=0.5, textColorRGB=[0, 125, 0])
self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.65,1,0.55], lifeTime=0.5, textColorRGB=[0, 125, 0])
if self.last_move_dist is None or self.last_owner != 1:
self.last_move_dist = dist
reward = self.last_move_dist - dist
self.env.p.addUserDebugText(f"Reward:{reward}", [0.61,1,0.55], lifeTime=0.5, textColorRGB=[0,125,0])
self.last_move_dist = dist
ix = 1 if self.num_networks > 1 else 0
self.network_rewards[ix] += reward
self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[ix]}", [0.59,1,0.6], lifeTime=0.5, textColorRGB=[0,0,125])
self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[ix]}", [0.65,1,0.7], lifeTime=0.5, textColorRGB=[0,0,125])
return reward


Expand All @@ -1549,13 +1549,13 @@ def rotate_compute(self, object, goal):
self.env.p.addUserDebugText("rotate", [0.63,1,0.5], lifeTime=0.1, textColorRGB=[125,125,0])
self.env.p.addUserDebugLine(object[:3], goal[:3], lifeTime=0.1)
dist = self.task.calc_distance(object, goal)
self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.59,1,0.65], lifeTime=0.5, textColorRGB=[0, 125, 0])
self.env.p.addUserDebugText("Distance: {}".format(round(dist,3)), [0.65,1,0.55], lifeTime=0.5, textColorRGB=[0, 125, 0])
if self.last_place_dist is None or self.last_owner != 2:
self.last_place_dist = dist
reward = self.last_place_dist - dist

rot = self.task.calc_rot_quat(object, goal)
self.env.p.addUserDebugText("Rotation: {}".format(round(rot,3)), [0.57,1,0.7], lifeTime=0.5, textColorRGB=[0, 222, 100])
self.env.p.addUserDebugText("Rotation: {}".format(round(rot,3)), [0.65,1,0.6], lifeTime=0.5, textColorRGB=[0, 222, 100])
if self.last_rot_dist is None or self.last_owner != 2:
self.last_rot_dist = rot
rewardrot = self.last_rot_dist - rot
Expand All @@ -1574,7 +1574,7 @@ def rotate_compute(self, object, goal):
# self.env.episode_info = "Task finished in initial configuration"
# self.env.episode_over = True
self.network_rewards[-1] += reward
self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[-1]}", [0.59,1,0.6], lifeTime=0.5, textColorRGB=[0,0,125])
self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[-1]}", [0.65,1,0.7], lifeTime=0.5, textColorRGB=[0,0,125])
return reward

def object_near_goal(self, object, goal):
Expand Down Expand Up @@ -1671,7 +1671,7 @@ def move_compute(self, object, pregoal):
self.last_place_dist = dist
ix = 1 if self.num_networks > 1 else 0
self.network_rewards[ix] += reward
self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[ix]}", [0.59,1,0.6], lifeTime=0.5, textColorRGB=[0,0,125])
self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[ix]}", [0.65,1,0.7], lifeTime=0.5, textColorRGB=[0,0,125])
return reward


Expand Down Expand Up @@ -1710,7 +1710,7 @@ def rotate_compute(self, object, goal):
# self.env.episode_info = "Task finished in initial configuration"
# self.env.episode_over = True
self.network_rewards[2] += reward
self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[-1]}", [0.59,1,0.6], lifeTime=0.5, textColorRGB=[0,0,125])
self.env.p.addUserDebugText(f"Rewards:{self.network_rewards[-1]}", [0.65,1,0.7], lifeTime=0.5, textColorRGB=[0,0,125])
return reward

def object_ready_swipe(self, object, goal):
Expand Down

0 comments on commit ee12746

Please sign in to comment.