-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
wangxiyao
committed
Jun 18, 2023
0 parents
commit b7ad76f
Showing
26 changed files
with
3,386 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
data | ||
exp | ||
results | ||
.idea | ||
*__pycache__* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
## Overview | ||
Code of ICML 2023 paper: "Live in the Moment: Learning Dynamics Model Adapted to Evolving Policy" | ||
|
||
## Usage | ||
|
||
MBPO | ||
> CUDA_VISIBLE_DEVICES=0 python main_pmac.py --env_name 'Humanoid-v2' --num_epoch 400 --exp_name humanoid_mbpo --seed 6 | ||
PDML-MBPO | ||
> CUDA_VISIBLE_DEVICES=0 python main_pmac_tvd.py --env_name 'Humanoid-v2' --num_epoch 400 --exp_name humanoid_pdml --seed 4 --reweight_model TV --reweight_rollout TV | ||
|
||
## code structure | ||
During training, 'exp' folder will created aside 'PDML' folder. | ||
|
||
## Dependencies | ||
MuJoCo 1.5 & MuJoCo 2.0 | ||
|
||
## Reference | ||
This code is built on a pytorch implementation MBPO: https://github.com/Xingyu-Lin/mbpo_pytorch |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import numpy as np | ||
from gym import utils | ||
from gym.envs.mujoco import mujoco_env | ||
|
||
class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle): | ||
""" | ||
External forces (sim.data.cfrc_ext) are removed from the observation. | ||
Otherwise identical to Ant-v2 from | ||
https://github.com/openai/gym/blob/master/gym/envs/mujoco/ant.py | ||
""" | ||
def __init__(self): | ||
mujoco_env.MujocoEnv.__init__(self, 'ant.xml', 5) | ||
utils.EzPickle.__init__(self) | ||
|
||
def step(self, a): | ||
xposbefore = self.get_body_com("torso")[0] | ||
self.do_simulation(a, self.frame_skip) | ||
xposafter = self.get_body_com("torso")[0] | ||
forward_reward = (xposafter - xposbefore)/self.dt | ||
ctrl_cost = .5 * np.square(a).sum() | ||
contact_cost = 0.5 * 1e-3 * np.sum( | ||
np.square(np.clip(self.sim.data.cfrc_ext, -1, 1))) | ||
survive_reward = 1.0 | ||
reward = forward_reward - ctrl_cost - contact_cost + survive_reward | ||
state = self.state_vector() | ||
notdone = np.isfinite(state).all() \ | ||
and state[2] >= 0.2 and state[2] <= 1.0 | ||
done = not notdone | ||
ob = self._get_obs() | ||
return ob, reward, done, dict( | ||
reward_forward=forward_reward, | ||
reward_ctrl=-ctrl_cost, | ||
reward_contact=-contact_cost, | ||
reward_survive=survive_reward) | ||
|
||
def _get_obs(self): | ||
return np.concatenate([ | ||
self.sim.data.qpos.flat[2:], | ||
self.sim.data.qvel.flat, | ||
# np.clip(self.sim.data.cfrc_ext, -1, 1).flat, | ||
]) | ||
|
||
def reset_model(self): | ||
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-.1, high=.1) | ||
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1 | ||
self.set_state(qpos, qvel) | ||
return self._get_obs() | ||
|
||
def viewer_setup(self): | ||
self.viewer.cam.distance = self.model.stat.extent * 0.5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import numpy as np | ||
from gym.envs.mujoco import mujoco_env | ||
from gym import utils | ||
|
||
def mass_center(model, sim): | ||
mass = np.expand_dims(model.body_mass, 1) | ||
xpos = sim.data.xipos | ||
return (np.sum(mass * xpos, 0) / np.sum(mass))[0] | ||
|
||
class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle): | ||
""" | ||
COM inertia (cinert), COM velocity (cvel), actuator forces (qfrc_actuator), | ||
and external forces (cfrc_ext) are removed from the observation. | ||
Otherwise identical to Humanoid-v2 from | ||
https://github.com/openai/gym/blob/master/gym/envs/mujoco/humanoid.py | ||
""" | ||
def __init__(self): | ||
mujoco_env.MujocoEnv.__init__(self, 'humanoid.xml', 5) | ||
utils.EzPickle.__init__(self) | ||
|
||
def _get_obs(self): | ||
data = self.sim.data | ||
return np.concatenate([data.qpos.flat[2:], | ||
data.qvel.flat, | ||
# data.cinert.flat, | ||
# data.cvel.flat, | ||
# data.qfrc_actuator.flat, | ||
# data.cfrc_ext.flat | ||
]) | ||
|
||
def step(self, a): | ||
pos_before = mass_center(self.model, self.sim) | ||
self.do_simulation(a, self.frame_skip) | ||
pos_after = mass_center(self.model, self.sim) | ||
alive_bonus = 5.0 | ||
data = self.sim.data | ||
lin_vel_cost = 1.25 * (pos_after - pos_before) / self.dt | ||
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() | ||
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum() | ||
quad_impact_cost = min(quad_impact_cost, 10) | ||
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus | ||
qpos = self.sim.data.qpos | ||
done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0)) | ||
return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost) | ||
|
||
def reset_model(self): | ||
c = 0.01 | ||
self.set_state( | ||
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), | ||
self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,) | ||
) | ||
return self._get_obs() | ||
|
||
def viewer_setup(self): | ||
self.viewer.cam.trackbodyid = 1 | ||
self.viewer.cam.distance = self.model.stat.extent * 1.0 | ||
self.viewer.cam.lookat[2] = 2.0 | ||
self.viewer.cam.elevation = -20 |
Oops, something went wrong.