-
Notifications
You must be signed in to change notification settings - Fork 72
/
Copy pathmain.py
136 lines (112 loc) · 5.25 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python
import argparse
import logging
import gym
import sys
import universe
from universe import pyprofile, wrappers
from GameSettingsEvent import GTASetting
from drivers.deepdrive.deep_driver import DeepDriver
from drivers.deepdrive_tf.deep_driver_tf import DeepDriverTF
logger = logging.getLogger()
extra_logger = logging.getLogger('universe')
stdout_log_handler = logging.StreamHandler(sys.stdout)
stdout_log_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
stdout_log_handler.setFormatter(formatter)
extra_logger.addHandler(stdout_log_handler)
def main():
# You can optionally set up the logger. Also fine to set the level
# to logging.DEBUG or logging.WARN if you want to change the
# amount of output.
logger.setLevel(logging.INFO)
universe.configure_logging()
parser = argparse.ArgumentParser(description=None)
parser.add_argument('-e', '--env_id', default='gtav.SaneDriving-v0', help='Which environment to run on.')
parser.add_argument('-m', '--monitor', action='store_false', help='Whether to activate the monitor.')
parser.add_argument('-r', '--remote', help='The number of environments to create (e.g. -r 20), or the address of pre-existing VNC servers and rewarders to use (e.g. -r vnc://localhost:5900+15900,localhost:5901+15901), or a query to the allocator (e.g. -r http://allocator.sci.openai-tech.com?n=2)')
parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
parser.add_argument('-R', '--no-render', action='store_true', help='Do not render the environment locally.')
parser.add_argument('-f', '--fps', default=8., type=float, help='Desired frames per second')
parser.add_argument('-N', '--max-steps', type=int, default=10**7, help='Maximum number of steps to take')
parser.add_argument('-d', '--driver', default='DeepDriver', help='Choose your driver')
parser.add_argument('-c', '--custom_camera', action='store_true', help='Customize the GTA camera')
args = parser.parse_args()
logging.getLogger('gym').setLevel(logging.NOTSET)
logging.getLogger('universe').setLevel(logging.NOTSET)
if args.verbosity == 0:
logger.setLevel(logging.INFO)
elif args.verbosity >= 1:
logger.setLevel(logging.DEBUG)
if args.env_id is not None:
# N.B. This does not set the actual environment mode yet, which
# is currently driven by environment itself.
env = gym.make(args.env_id)
else:
env = wrappers.WrappedVNCEnv()
if not isinstance(env, wrappers.GymCoreAction):
# The GymCoreSyncEnv's try to mimic their core counterparts,
# and thus came pre-wrapped wth an action space
# translator. Everything else probably wants a SafeActionSpace
# wrapper to shield them from random-agent clicking around
# everywhere.
env = wrappers.SafeActionSpace(env)
else:
# Only gym-core are seedable
env.seed([0])
env = wrappers.Logger(env)
env.configure(
fps=args.fps,
# print_frequency=None,
# ignore_clock_skew=True,
remotes=args.remote,
vnc_driver='go', vnc_kwargs={
'encoding': 'tight', 'compress_level': 0, 'fine_quality_level': 50, 'subsample_level': 0, 'quality_level': 5,
},
)
if args.driver == 'DeepDriver':
driver = DeepDriver()
elif args.driver == 'DeepDriverTF':
driver = DeepDriverTF()
else:
raise Exception('That driver is not available')
driver.setup()
if args.monitor:
# env.monitor.start('/tmp/vnc_random_agent', force=True, video_callable=lambda i: True)
wrappers.Monitor(env, '/tmp/vnc_random_agent', video_callable=False, force=True)
render = not args.no_render
observation_n = env.reset()
reward_n = [0] * env.n
done_n = [False] * env.n
info = None
for i in range(args.max_steps):
if render:
# Note the first time you call render, it'll be relatively
# slow and you'll have some aggregated rewards. We could
# open the render() window before `reset()`, but that's
# confusing since it pops up a black window for the
# duration of the reset.
env.render()
action_n = driver.step(observation_n, reward_n, done_n, info)
try:
if info is not None:
distance = info['n'][0]['distance_from_destination']
logger.info('distance %s', distance)
except KeyError as e:
logger.debug('distance not available %s', str(e))
if args.custom_camera:
# Sending this every step is probably overkill
for action in action_n:
action.append(GTASetting('use_custom_camera', True))
# Take an action
with pyprofile.push('env.step'):
_step = env.step(action_n)
observation_n, reward_n, done_n, info = _step
if any(done_n) and info and not any(info_n.get('env_status.artificial.done', False) for info_n in info['n']):
print('done_n', done_n, 'i', i)
logger.info('end of episode')
env.reset()
# We're done! clean up
env.close()
if __name__ == '__main__':
sys.exit(main())