-
Notifications
You must be signed in to change notification settings - Fork 1
/
MbanditSimulator.m
47 lines (42 loc) · 1.3 KB
/
MbanditSimulator.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
classdef MbanditSimulator < handle
properties
arms % array
policies % cell
K
L
h
end
methods
function self = MbanditSimulator(arms_, policies_, K_, L_, h_)
self.arms = arms_;
self.policies = policies_;
self.K = K_;
self.L = L_;
self.h = h_;
end
function run_simulation(self, T_, logger_)
for p = 1:length(self.policies)
for t = 1:T_
if mod(t,1) == 0
fprintf('Policy %d at round %d ...\n', p, t)
end
self.run_single_round(logger_, p, t);
end
end
% reset the arms, not necessary here.
for iarm = self.arms
iarm.reset();
end
end
function run_single_round(self, logger_, p_, t_)
l_indices = self.policies{p_}.selectNextArms();
l_rewards = [];
for idx = l_indices
l_rewards = [l_rewards, self.arms(idx).pull()];
end
self.policies{p_}.updateState(l_indices, l_rewards);
logger_.record_reward(p_, t_, l_indices, l_rewards);
logger_.record_violation(p_, t_, self.h)
end
end
end