Skip to content

Commit 52ed16c

Browse files
committed
Chapter 14: Robot Learning
1 parent cf26dcb commit 52ed16c

10 files changed

+1202
-0
lines changed

Chapter14/alp/alp_gmm.py

+210
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
"""
2+
Copied from
3+
https://github.com/flowersteam/teachDeepRL/blob/master/teachDRL/teachers/algos/alp_gmm.py
4+
@misc{portelas2019teacher,
5+
title={Teacher algorithms for curriculum learning of Deep RL in continuously parameterized environments},
6+
author={Rémy Portelas and Cédric Colas and Katja Hofmann and Pierre-Yves Oudeyer},
7+
year={2019},
8+
eprint={1910.07224},
9+
archivePrefix={arXiv},
10+
primaryClass={cs.LG}
11+
}
12+
"""
13+
14+
from sklearn.mixture import GaussianMixture as GMM
15+
import numpy as np
16+
from gym.spaces import Box
17+
from alp.dataset import BufferedDataset
18+
19+
20+
def proportional_choice(v, eps=0.0):
21+
if np.sum(v) == 0 or np.random.rand() < eps:
22+
return np.random.randint(np.size(v))
23+
else:
24+
probas = np.array(v) / np.sum(v)
25+
return np.where(np.random.multinomial(1, probas) == 1)[0][0]
26+
27+
28+
# Absolute Learning Progress (ALP) computer object
29+
# It uses a buffered kd-tree to efficiently implement a k-nearest-neighbor algorithm
30+
class EmpiricalALPComputer:
31+
def __init__(self, task_size, max_size=None, buffer_size=500):
32+
self.alp_knn = BufferedDataset(
33+
1, task_size, buffer_size=buffer_size, lateness=0, max_size=max_size
34+
)
35+
36+
def compute_alp(self, task, reward):
37+
alp = 0
38+
if len(self.alp_knn) > 5:
39+
# Compute absolute learning progress for new task
40+
41+
# 1 - Retrieve closest previous task
42+
dist, idx = self.alp_knn.nn_y(task)
43+
44+
# 2 - Retrieve corresponding reward
45+
closest_previous_task_reward = self.alp_knn.get_x(idx[0])
46+
47+
# 3 - Compute alp as absolute difference in reward
48+
lp = reward - closest_previous_task_reward
49+
alp = np.abs(lp)
50+
51+
# Add to database
52+
self.alp_knn.add_xy(reward, task)
53+
return alp
54+
55+
56+
# Absolute Learning Progress - Gaussian Mixture Model
57+
# mins / maxs are vectors defining task space boundaries (ex: mins=[0,0,0] maxs=[1,1,1])
58+
class ALPGMM:
59+
def __init__(self, mins, maxs, seed=None, params=dict()):
60+
self.seed = seed
61+
if not seed:
62+
self.seed = np.random.randint(42, 424242)
63+
np.random.seed(self.seed)
64+
65+
# Task space boundaries
66+
self.mins = np.array(mins)
67+
self.maxs = np.array(maxs)
68+
69+
# Range of number of Gaussians to try when fitting the GMM
70+
self.potential_ks = (
71+
np.arange(2, 11, 1)
72+
if "potential_ks" not in params
73+
else params["potential_ks"]
74+
)
75+
# Restart new fit by initializing with last fit
76+
self.warm_start = False if "warm_start" not in params else params["warm_start"]
77+
# Fitness criterion when selecting best GMM among range of GMMs varying in number of Gaussians.
78+
self.gmm_fitness_fun = (
79+
"aic" if "gmm_fitness_fun" not in params else params["gmm_fitness_fun"]
80+
)
81+
# Number of Expectation-Maximization trials when fitting
82+
self.nb_em_init = 1 if "nb_em_init" not in params else params["nb_em_init"]
83+
# Number of episodes between two fit of the GMM
84+
self.fit_rate = 250 if "fit_rate" not in params else params["fit_rate"]
85+
self.nb_random = self.fit_rate # Number of bootstrapping episodes
86+
87+
# Ratio of randomly sampled tasks VS tasks sampling using GMM
88+
self.random_task_ratio = (
89+
0.2 if "random_task_ratio" not in params else params["random_task_ratio"]
90+
)
91+
self.random_task_generator = Box(self.mins, self.maxs, dtype=np.float32)
92+
93+
# Maximal number of episodes to account for when computing ALP
94+
alp_max_size = None if "alp_max_size" not in params else params["alp_max_size"]
95+
alp_buffer_size = (
96+
500 if "alp_buffer_size" not in params else params["alp_buffer_size"]
97+
)
98+
99+
# Init ALP computer
100+
self.alp_computer = EmpiricalALPComputer(
101+
len(mins), max_size=alp_max_size, buffer_size=alp_buffer_size
102+
)
103+
104+
self.tasks = []
105+
self.alps = []
106+
self.tasks_alps = []
107+
108+
# Init GMMs
109+
self.potential_gmms = [self.init_gmm(k) for k in self.potential_ks]
110+
111+
# Boring book-keeping
112+
self.bk = {
113+
"weights": [],
114+
"covariances": [],
115+
"means": [],
116+
"tasks_alps": [],
117+
"episodes": [],
118+
}
119+
120+
def init_gmm(self, nb_gaussians):
121+
return GMM(
122+
n_components=nb_gaussians,
123+
covariance_type="full",
124+
random_state=self.seed,
125+
warm_start=self.warm_start,
126+
n_init=self.nb_em_init,
127+
)
128+
129+
def get_nb_gmm_params(self, gmm):
130+
# assumes full covariance
131+
# see https://stats.stackexchange.com/questions/229293/the-number-of-parameters-in-gaussian-mixture-model
132+
nb_gmms = gmm.get_params()["n_components"]
133+
d = len(self.mins)
134+
params_per_gmm = (d * d - d) / 2 + 2 * d + 1
135+
return nb_gmms * params_per_gmm - 1
136+
137+
def update(self, task, reward):
138+
self.tasks.append(task)
139+
140+
# Compute corresponding ALP
141+
self.alps.append(self.alp_computer.compute_alp(task, reward))
142+
143+
# Concatenate task vector with ALP dimension
144+
self.tasks_alps.append(np.array(task.tolist() + [self.alps[-1]]))
145+
146+
if len(self.tasks) >= self.nb_random: # If initial bootstrapping is done
147+
if (len(self.tasks) % self.fit_rate) == 0: # Time to fit
148+
# 1 - Retrieve last <fit_rate> (task, reward) pairs
149+
cur_tasks_alps = np.array(self.tasks_alps[-self.fit_rate :])
150+
151+
# 2 - Fit batch of GMMs with varying number of Gaussians
152+
self.potential_gmms = [
153+
g.fit(cur_tasks_alps) for g in self.potential_gmms
154+
]
155+
156+
# 3 - Compute fitness and keep best GMM
157+
fitnesses = []
158+
if self.gmm_fitness_fun == "bic": # Bayesian Information Criterion
159+
fitnesses = [m.bic(cur_tasks_alps) for m in self.potential_gmms]
160+
elif self.gmm_fitness_fun == "aic": # Akaike Information Criterion
161+
fitnesses = [m.aic(cur_tasks_alps) for m in self.potential_gmms]
162+
elif self.gmm_fitness_fun == "aicc": # Modified AIC
163+
n = self.fit_rate
164+
fitnesses = []
165+
for l, m in enumerate(self.potential_gmms):
166+
k = self.get_nb_gmm_params(m)
167+
penalty = (2 * k * (k + 1)) / (n - k - 1)
168+
fitnesses.append(m.aic(cur_tasks_alps) + penalty)
169+
else:
170+
raise NotImplementedError
171+
exit(1)
172+
self.gmm = self.potential_gmms[np.argmin(fitnesses)]
173+
174+
# book-keeping
175+
self.bk["weights"].append(self.gmm.weights_.copy())
176+
self.bk["covariances"].append(self.gmm.covariances_.copy())
177+
self.bk["means"].append(self.gmm.means_.copy())
178+
self.bk["tasks_alps"] = self.tasks_alps
179+
self.bk["episodes"].append(len(self.tasks))
180+
181+
def sample_task(self):
182+
if (len(self.tasks) < self.nb_random) or (
183+
np.random.random() < self.random_task_ratio
184+
):
185+
# Random task sampling
186+
new_task = self.random_task_generator.sample()
187+
else:
188+
# ALP-based task sampling
189+
190+
# 1 - Retrieve the mean ALP value of each Gaussian in the GMM
191+
self.alp_means = []
192+
for pos, _, w in zip(
193+
self.gmm.means_, self.gmm.covariances_, self.gmm.weights_
194+
):
195+
self.alp_means.append(pos[-1])
196+
197+
# 2 - Sample Gaussian proportionally to its mean ALP
198+
idx = proportional_choice(self.alp_means, eps=0.0)
199+
200+
# 3 - Sample task in Gaussian, without forgetting to remove ALP dimension
201+
new_task = np.random.multivariate_normal(
202+
self.gmm.means_[idx], self.gmm.covariances_[idx]
203+
)[:-1]
204+
new_task = np.clip(new_task, self.mins, self.maxs).astype(np.float32)
205+
206+
return new_task
207+
208+
def dump(self, dump_dict):
209+
dump_dict.update(self.bk)
210+
return dump_dict

0 commit comments

Comments
 (0)