forked from swordest/mec_drl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
69 lines (54 loc) · 2.19 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from mec_env import *
from helper import *
import tensorflow as tf
import ipdb as pdb
MAX_EPISODE = 500
MAX_EPISODE_LEN = 1000
NUM_T = 1
NUM_R = 1
SIGMA2 = 0.3e-9
config = {'state_dim':3, 'action_dim':2};
train_config = {'minibatch_size':64, 'actor_lr':0.0001, 'tau':0.001,
'critic_lr':0.001, 'gamma':0.99, 'buffer_size':1000000,
'random_seed':1234}
user_config = [{'id':'1', 'rate':3, 'dis':100, 'action_bound':1,
'data_buf_size':100, 't_factor':1.0, 'penalty':-2000}];
print user_config
# 0. initialize the session object
sess = tf.Session()
# 1. include all user in the system according to the user_config
user_list = [];
for info in user_config:
info.update(config)
user_list.append(MecTerm(sess, info, train_config))
print 'test'
# 2. create the simulation env
env = MecSvrEnv(user_list, NUM_R, SIGMA2, MAX_EPISODE_LEN)
sess.run(tf.global_variables_initializer())
env.init_target_network()
r_rec = []
# 3. start to explore for each episode
for i in range(MAX_EPISODE):
# start from a random state
cur_init_ds_ep = env.reset()
cur_r_ep = np.zeros(len(user_list))
cur_p_ep = np.zeros(len(user_list))
cur_ts_ep = np.zeros(len(user_list))
cur_ps_ep = np.zeros(len(user_list))
cur_rs_ep = np.zeros(len(user_list))
cur_ds_ep = np.zeros(len(user_list))
cur_ch_ep = np.zeros(len(user_list))
for j in range(MAX_EPISODE_LEN):
# first try to transmit from current state
[cur_r, done, cur_p, temp, cur_ts, cur_ps, cur_rs, cur_ds, cur_ch] = env.step_transmit()
cur_r_ep += cur_r
cur_p_ep += cur_p
cur_ts_ep += cur_ts
cur_ps_ep += cur_ps
cur_rs_ep += cur_rs
cur_ds_ep += cur_ds
cur_ch_ep += cur_ch
if done:
r_rec.append(cur_r)
print('%d:r:%.4f,p:%.4f,tr:%.4f,pr:%.4f,rev:%.4f,dbuf:%.4f,ch:%.8f,ibuf:%d' % (i, cur_r_ep/MAX_EPISODE_LEN, cur_p_ep/MAX_EPISODE_LEN, cur_ts_ep/MAX_EPISODE_LEN, cur_ps_ep/MAX_EPISODE_LEN, cur_rs_ep/MAX_EPISODE_LEN, cur_ds_ep/MAX_EPISODE_LEN, cur_ch_ep/MAX_EPISODE_LEN, cur_init_ds_ep[0]))
plot_helper(range(MAX_EPISODE), r_rec)