-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpenenv3.py
127 lines (103 loc) · 3.64 KB
/
penenv3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from os import path
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from os import path
class PenEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 30
}
def __init__(self, env_config={}):
self.max_speed = 8
self.max_torque = 2.
self.dt = .05
self.g = env_config.get("g", 10)
self.m = 1.
self.l = 1.
self.viewer = None
high = np.array([1., 1., self.max_speed], dtype=np.float32)
self.action_space = spaces.Box(
low=-self.max_torque,
high=self.max_torque, shape=(1,),
dtype=np.float32
)
self.observation_space = spaces.Box(
low=-high,
high=high,
dtype=np.float32
)
self.seed()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def step(self, u):
th, thdot = self.state # th := theta
g = self.g
m = self.m
l = self.l
dt = self.dt
u = np.clip(u, -self.max_torque, self.max_torque)[0]
self.last_u = u # for rendering
costs = angle_normalize(th) ** 2 + .1 * thdot ** 2 + .001 * (u ** 2)
newthdot = thdot + (-3 * g / (2 * l) * np.sin(th + np.pi) + 3. / (m * l ** 2) * u) * dt
newth = th + newthdot * dt
newthdot = np.clip(newthdot, -self.max_speed, self.max_speed)
self.state = np.array([newth, newthdot])
return self._get_obs(), -costs, False, {}
def reset(self):
high = np.array([np.pi, 1])
self.state = self.np_random.uniform(low=-high, high=high)
self.last_u = None
return self._get_obs()
def _get_obs(self):
theta, thetadot = self.state
return np.array([np.cos(theta), np.sin(theta), thetadot])
def render(self, mode='human'):
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(500, 500)
self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
rod = rendering.make_capsule(1, .2)
rod.set_color(.8, .3, .3)
self.pole_transform = rendering.Transform()
rod.add_attr(self.pole_transform)
self.viewer.add_geom(rod)
axle = rendering.make_circle(.05)
axle.set_color(0, 0, 0)
self.viewer.add_geom(axle)
fname = path.join(path.dirname(__file__), "assets/clockwise.png")
self.img = rendering.Image(fname, 1., 1.)
self.imgtrans = rendering.Transform()
self.img.add_attr(self.imgtrans)
self.viewer.add_onetime(self.img)
self.pole_transform.set_rotation(self.state[0] + np.pi / 2)
if self.last_u:
self.imgtrans.scale = (-self.last_u / 2, np.abs(self.last_u) / 2)
return self.viewer.render(return_rgb_array=mode == 'rgb_array')
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None
def sample_tasks(self, n_tasks):
# Mass is a random float between 0.5 and 2
return np.random.uniform(low=0.5, high=2.0, size=(n_tasks, ))
def set_task(self, task):
"""
Args:
task: task of the meta-learning environment
"""
self.m = task
def get_task(self):
"""
Returns:
task: task of the meta-learning environment
"""
return self.m
def angle_normalize(x):
return (((x+np.pi) % (2*np.pi)) - np.pi)