-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdeep_q_network_gym.py
74 lines (65 loc) · 2.61 KB
/
deep_q_network_gym.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import tensorflow as tf
import sys
import envs.gym_fun as game
from experiment import Experiment
from dqn_net import DqnNet
# Breakout
GAME = 'breakout' # the name of the game being played for log files
ACTIONS = 4 # number of valid actions
# Pong
# GAME = 'pong' # the name of the game being played for log files
# ACTIONS = 3 # number of valid actions
GAMMA = 0.99 # decay rate of past observations
OBSERVE = 50000 # timesteps to observe before training
EXPLORE = 1000000 # frames over which to anneal epsilon
FINAL_EPSILON = 0.1 # final value of epsilon
INITIAL_EPSILON = 1.0 # starting value of epsilon
REPLAY_MEMORY = 1000000 # number of previous transitions to remember
RESIZED_WIDTH = 84
RESIZED_HEIGHT = 84
BATCH = 32 # size of minibatch
PHI_LENGTH = 4 # rms
UPDATE_FREQUENCY = 4
SAVE_FREQ = 125000
EVAL_FREQ = 250000
EVAL_MAX_STEPS = 125000
TRAIN_MAX_STEPS = 7125000
C_FREQ = 10000
SLOW = False
TAU = 1.
OPTIMIZER = 'Graves' # Adam, RMS, Graves
if OPTIMIZER == 'Adam':
LEARNING_RATE = 0.00025
DECAY = 0.
MOMENTUM = 0.
EPSILON = 0.001
else:
LEARNING_RATE = 0.00025
DECAY = 0.95
MOMENTUM = 0.
EPSILON = 0.01
PATH = os.getcwd() + '/'
FOLDER = '{}_networks_{}'.format(GAME, OPTIMIZER.lower())
VERBOSE = False
NUM_THREADS = 16
def main():
#with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True, intra_op_parallelism_threads=NUM_THREADS)) as sess:
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True, log_device_placement=True, intra_op_parallelism_threads=NUM_THREADS)) as sess:
with tf.device('/gpu:'+os.environ["CUDA_VISIBLE_DEVICES"]):
net = DqnNet(
sess, RESIZED_HEIGHT, RESIZED_WIDTH, PHI_LENGTH, ACTIONS, GAME, gamma=GAMMA, copy_interval=C_FREQ,
optimizer=OPTIMIZER, learning_rate=LEARNING_RATE, epsilon=EPSILON, decay=DECAY, momentum=MOMENTUM,
verbose=VERBOSE, path=PATH, folder=FOLDER, slow=SLOW, tau=TAU)
experiment = Experiment(
sess, net, game, RESIZED_HEIGHT, RESIZED_WIDTH, PHI_LENGTH, ACTIONS, BATCH,
GAME, GAMMA, OBSERVE, EXPLORE, FINAL_EPSILON, INITIAL_EPSILON, REPLAY_MEMORY,
UPDATE_FREQUENCY, SAVE_FREQ, EVAL_FREQ, EVAL_MAX_STEPS, C_FREQ,
OPTIMIZER, LEARNING_RATE, EPSILON, DECAY, MOMENTUM, TAU,
VERBOSE, PATH, FOLDER, SLOW, train_max_steps=TRAIN_MAX_STEPS)
experiment.run()
if __name__ == "__main__":
main()