-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
94 lines (70 loc) · 2.4 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gym
import numpy as np
import torch
from uvfa import UVFAgent
from rl import StandardRLAgent
from uvfa_r import UVFAWithRewardAgent
from stats import Stats
from stat_logger import StatLogger
device = torch.device('cuda')
# device = torch.device('cpu')
def main():
# env = gym.make('Pendulum-v0')
# env = gym.make('Acrobot-v1')
env = gym.make('CartPole-v1')
# env = gym.make('MountainCar-v0')
# env = gym.make('MountainCarContinuous-v0')
# env = gym.make('HalfCheetah-v3')
# env = gym.make('InvertedPendulum-v2')
# env = gym.make('Ant-v3')
use_td3 = True
td3_str = '' if use_td3 else '_notd3'
# agent, agent_type = StandardRLAgent(env, device=device), 'rl'
agent, agent_type = UVFAWithRewardAgent(env, use_td3=use_td3, device=device), 'uvfa_r' + td3_str
run_name = env.spec.id + '_' + agent_type
logger = StatLogger(run_name=run_name, aggregate_steps=2000)
print(env.observation_space, env.action_space)
num_episode = 200000
# num_episode = 0
max_steps = 100000
total_steps = 0
for ep in range(num_episode):
if total_steps > max_steps:
break
print('Episode', ep, 'Total Step', total_steps)
steps = agent.run_episode()
total_steps += steps
if ep % 10 == 0:
print('==Test==')
tep = 10
stats = Stats()
for i in range(tep):
info = agent.test_episode()
stats.update(info)
print(stats)
logger.add_data(total_steps, stats)
if ep % 20 == 0 and isinstance(agent, UVFAWithRewardAgent):
if agent.update_planner():
tep = 10
stats = Stats()
for i in range(tep):
show_plan = (i == 0)
show_plan = False
info = agent.plan_episode(show_plan=show_plan)
stats.update(info)
print(stats)
logger.add_data(total_steps, stats)
# print(s)
# Control features linearly as subgoal
# print(env.s)
# returns = []
# min_dis = []
# for ep in range(20):
# # print(ep)
# r, m = agent.test_episode()
# returns.append(r)
# min_dis.append(m)
# print('Avg return =', np.mean(returns))
# print('Min dis =', np.mean(min_dis))
if __name__ == '__main__':
main()