forked from qcappart/hybrid-cp-rl-solver
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain_training_ppo_tsptw.py
67 lines (52 loc) · 2.53 KB
/
main_training_ppo_tsptw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import sys
import os
import argparse
from src.problem.tsptw.learning.trainer_ppo import TrainerPPO
os.environ['KMP_DUPLICATE_LIB_OK']='True'
def parse_arguments():
parser = argparse.ArgumentParser()
# Instances parameters
parser.add_argument('--n_city', type=int, default=20)
parser.add_argument('--grid_size', type=int, default=100)
parser.add_argument('--max_tw_gap', type=int, default=10)
parser.add_argument('--max_tw_size', type=int, default=100)
parser.add_argument('--seed', type=int, default=1)
# Hyper parameters
parser.add_argument('--n_episode', type=int, default=1000000)
parser.add_argument('--learning_rate', type=float, default=0.002)
parser.add_argument('--update_timestep', type=int, default=2000)
parser.add_argument('--eps_clip', type=float, default=0.2)
parser.add_argument('--entropy_value', type=float, default=0.01)
parser.add_argument('--k_epochs', type=int, default=4)
parser.add_argument('--batch_size', type=int, default=32)
parser.add_argument('--latent_dim', type=int, default=64)
parser.add_argument('--hidden_layer', type=int, default=3)
# Argument for Trainer
parser.add_argument('--save_dir', type=str, default='./result-default')
parser.add_argument('--plot_training', type=int, default=1)
parser.add_argument('--mode', default='cpu', help='cpu/gpu')
return parser.parse_args()
if __name__ == '__main__':
args = parse_arguments()
print("***********************************************************")
print("[INFO] TRAINING ON RANDOM INSTANCES: TSPTW")
print("[INFO] n_city: %d" % args.n_city)
print("[INFO] grid_size: %d" % args.grid_size)
print("[INFO] max_tw_gap: %d" % args.max_tw_gap)
print("[INFO] max_tw_size: %d" % args.max_tw_size)
print("[INFO] seed: %s" % args.seed)
print("***********************************************************")
print("[INFO] TRAINING PARAMETERS")
print("[INFO] Algorithm: PPO")
print("[INFO] learning rate: %f" % args.learning_rate)
print("[INFO] eps_clip: %f" % args.eps_clip)
print("[INFO] entropy_value: %f" % args.entropy_value)
print("[INFO] hidden_layer: %d" % args.hidden_layer)
print("[INFO] k_epochs: %d" % args.k_epochs)
print("[INFO] batch_size: %d" % args.batch_size)
print("[INFO] update_timestep: %d" % args.update_timestep)
print("[INFO] latent_dim: %d" % args.latent_dim)
print("***********************************************************")
sys.stdout.flush()
trainer = TrainerPPO(args)
trainer.run_training()