-
Notifications
You must be signed in to change notification settings - Fork 4
/
foo.py
70 lines (57 loc) · 2.16 KB
/
foo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import numpy as np
def evaulate_policy(w, env, number_of_episodes=2):
"""It evaluates a policy for number_of_episodes and returns and average score
:param w: our policy is inner(w, s) > 0
:type w: ndarray
:param number_of_episodes: number of episodes we will run the policy for
:type number_of_episodes: int
:param env: environment object
:type env: environment object
:return: sum(timesteps_i)/number_of_episodes
:rtype: float
"""
results = []
for e in range(number_of_episodes):
s_old = env.reset()
t = 0
done = False
while not done:
# Choose action
action = None
if np.inner(w, s_old) > 0:
action = 1
else:
action = 0
# Take action
s_new, r, done, _ = env.step(action)
# Update
s_old = s_new
t += 1
results.append(t)
return np.mean(results)
def estimator(w_list, noise_coef):
""" It estiamted the mean vector and covariance matrix based on the list of collected w
:param w_list: list of weights that we will use to form our estimates
:type w_list: list of tuples
:param noise_coef: to estimated covariance matrix we add a noise_coef * identity matrix to increase variance
:type noise_coef: float
:return: sample estimate of mean vector (4,) and covariance matrix (4,4)
:rtype: pair of ndarrays
"""
w_list_ndarray = np.array(w_list)
mu_hat = np.mean(w_list_ndarray, axis=0)
covmat_hat = np.cov(np.transpose(w_list_ndarray)) + noise_coef * np.eye(4, 4) # ADD SOME CONSTANT TO AVOID
return mu_hat, covmat_hat
def simulator(n, mu, covmat):
""" Sampling n samples from multivariate normal with mean vector mu and covariance matrix covmat
:param n: number of samples to generate
:type n: int
:param mu: mean vector of 4 elements
:type mu: ndarray
:param covmat: (4,4) ndarray - covariance matrix
:type covmat: ndarray
:return: samples of multivariate normal
:rtype: list of tuples
"""
a = np.random.multivariate_normal(mu, covmat, n)
return [tuple(a[i, :]) for i in range(n)]