diff --git a/myGym/configs/debug.json b/myGym/configs/debug.json index ca164a46..8e8e38ac 100644 --- a/myGym/configs/debug.json +++ b/myGym/configs/debug.json @@ -37,7 +37,7 @@ "yolact_config" :null, #Train "train_framework" :"tensorflow", - "algo" :"multi", + "algo" :"multippo2", "num_networks" :3, "max_episode_steps" :512, "algo_steps" :512, diff --git a/myGym/configs/debug1.json b/myGym/configs/debug1.json index f18afe26..671c0368 100644 --- a/myGym/configs/debug1.json +++ b/myGym/configs/debug1.json @@ -37,7 +37,7 @@ "yolact_config" :null, #Train "train_framework" :"tensorflow", - "algo" :"multi", + "algo" :"multippo2", "num_networks" :3, "max_episode_steps" :512, "algo_steps" :512, diff --git a/myGym/configs/debugdist.json b/myGym/configs/debugdist.json index 6676350a..1c2e3502 100644 --- a/myGym/configs/debugdist.json +++ b/myGym/configs/debugdist.json @@ -37,11 +37,11 @@ "yolact_config" :null, #Train "train_framework" :"tensorflow", - "algo" :"ppo", + "algo" :"multippo2", "num_networks" :3, "max_episode_steps" :512, "algo_steps" :512, - "steps" :5120, + "steps" :50020, "pretrained_model" :null, "multiprocessing" :false, #Evaluation diff --git a/myGym/configs/pnr_easy.json b/myGym/configs/pnr_easy.json index bbee1905..64c3a88c 100644 --- a/myGym/configs/pnr_easy.json +++ b/myGym/configs/pnr_easy.json @@ -37,7 +37,7 @@ "yolact_config" :null, #Train "train_framework" :"tensorflow", - "algo" :"multi", + "algo" :"multippo2", "num_networks" :3, "max_episode_steps" :512, "algo_steps" :512, diff --git a/myGym/configs/pnr_hard.json b/myGym/configs/pnr_hard.json index 78a1cc6b..22c7f2c0 100644 --- a/myGym/configs/pnr_hard.json +++ b/myGym/configs/pnr_hard.json @@ -37,7 +37,7 @@ "yolact_config" :null, #Train "train_framework" :"tensorflow", - "algo" :"multi", + "algo" :"multippo2", "num_networks" :3, "max_episode_steps" :512, "algo_steps" :512, diff --git a/myGym/configs/pnr_medium.json b/myGym/configs/pnr_medium.json index 06d687e7..1e610b8c 100644 --- a/myGym/configs/pnr_medium.json +++ b/myGym/configs/pnr_medium.json @@ -37,7 +37,7 @@ "yolact_config" :null, #Train "train_framework" :"tensorflow", - "algo" :"multi", + "algo" :"multippo2", "num_networks" :3, "max_episode_steps" :512, "algo_steps" :512, diff --git a/myGym/configs/pnr_medium_multi2.json b/myGym/configs/pnr_medium_multi2.json index c40c0acf..6d75db2b 100644 --- a/myGym/configs/pnr_medium_multi2.json +++ b/myGym/configs/pnr_medium_multi2.json @@ -39,7 +39,7 @@ "yolact_config" :null, #Train "train_framework" :"tensorflow", - "algo" :"multi", + "algo" :"multippo2", "num_networks" :3, "max_episode_steps" :1024, "algo_steps" :1024, diff --git a/myGym/configs/train_pnp_3n.json b/myGym/configs/train_pnp_3n.json index dfcfdef1..76f18224 100644 --- a/myGym/configs/train_pnp_3n.json +++ b/myGym/configs/train_pnp_3n.json @@ -37,7 +37,7 @@ "yolact_config" :null, #Train "train_framework" :"tensorflow", - "algo" :"multi", + "algo" :"multippo2", "num_networks" :3, "max_episode_steps" :512, "algo_steps" :512, diff --git a/myGym/configs/train_pnp_3n_debug.json b/myGym/configs/train_pnp_3n_debug.json index 77845566..08836323 100644 --- a/myGym/configs/train_pnp_3n_debug.json +++ b/myGym/configs/train_pnp_3n_debug.json @@ -37,7 +37,7 @@ "yolact_config" :null, #Train "train_framework" :"tensorflow", - "algo" :"multi", + "algo" :"multippo2", "num_networks" :3, "max_episode_steps" :512, "algo_steps" :512, diff --git a/myGym/configs/train_pnp_3nrot.json b/myGym/configs/train_pnp_3nrot.json index 92075b10..079a251b 100644 --- a/myGym/configs/train_pnp_3nrot.json +++ b/myGym/configs/train_pnp_3nrot.json @@ -37,7 +37,7 @@ "yolact_config" :null, #Train "train_framework" :"tensorflow", - "algo" :"multi", + "algo" :"multippo2", "num_networks" :3, "max_episode_steps" :512, "algo_steps" :512, diff --git a/myGym/configs/train_pnp_4n_multitask2.json b/myGym/configs/train_pnp_4n_multitask2.json index dbcd02de..6502302c 100644 --- a/myGym/configs/train_pnp_4n_multitask2.json +++ b/myGym/configs/train_pnp_4n_multitask2.json @@ -39,7 +39,7 @@ "yolact_config" :null, #Train "train_framework" :"tensorflow", - "algo" :"multi", + "algo" :"multippo2", "num_networks" :4, "max_episode_steps" :1024, "algo_steps" :1024, diff --git a/myGym/configs/train_pnp_4n_multitask3.json b/myGym/configs/train_pnp_4n_multitask3.json index aa3156b0..62d01b41 100644 --- a/myGym/configs/train_pnp_4n_multitask3.json +++ b/myGym/configs/train_pnp_4n_multitask3.json @@ -41,7 +41,7 @@ "yolact_config" :null, #Train "train_framework" :"tensorflow", - "algo" :"multi", + "algo" :"multippo2", "num_networks" :4, "max_episode_steps" :1024, "algo_steps" :1024, diff --git a/myGym/stable_baselines_mygym/multi_ppo2.py b/myGym/stable_baselines_mygym/multi_ppo2.py index d4dbf13a..449d0b9f 100644 --- a/myGym/stable_baselines_mygym/multi_ppo2.py +++ b/myGym/stable_baselines_mygym/multi_ppo2.py @@ -273,7 +273,7 @@ def learn(self, total_timesteps, callback=None, log_interval=1, tb_log_name="Dua if self.verbose >= 1 and (update % log_interval == 0 or update == 1): explained_var = explained_variance(values, returns) - #logger.logkv("Steps", steps_used) + logger.logkv("Steps", steps_used) logger.dumpkvs() i+=1 diff --git a/myGym/test.py b/myGym/test.py index a613e1a4..1c5a3ce8 100644 --- a/myGym/test.py +++ b/myGym/test.py @@ -351,7 +351,7 @@ def test_model(env, model=None, implemented_combos=None, arg_dict=None, model_lo test_env(env, arg_dict) else: try: - if arg_dict["algo"] == "multi": + if "multi" in arg_dict["algo"]: model_args = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][1] model = implemented_combos[arg_dict["algo"]][arg_dict["train_framework"]][0].load(arg_dict["model_path"], env=model_args[1].env) else: diff --git a/myGym/train.py b/myGym/train.py index b7707e18..cd017f19 100644 --- a/myGym/train.py +++ b/myGym/train.py @@ -117,7 +117,7 @@ def configure_implemented_combos(env, model_logdir, arg_dict): "torchppo": {"tensorflow": [TorchPPO, (TorchMlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}]}, "myalgo": {"tensorflow": [MyAlgo, (MyMlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}]}, "ref": {"tensorflow": [REFER, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"], "verbose": 1, "tensorboard_log": model_logdir}]}, - "multi": {"tensorflow": [MultiPPO2, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"],"n_models": arg_dict["num_networks"], "verbose": 1, "tensorboard_log": model_logdir}]}, + "multippo2": {"tensorflow": [MultiPPO2, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"],"n_models": arg_dict["num_networks"], "verbose": 1, "tensorboard_log": model_logdir}]}, "multiacktr": {"tensorflow": [MultiACKTR, (MlpPolicy, env), {"n_steps": arg_dict["algo_steps"],"n_models": arg_dict["num_networks"], "verbose": 1, "tensorboard_log": model_logdir}]}} if "PPO_P" in sys.modules: