perf: removed package docopt and using argparse instead. (#45, #39)

1. removed `docopt` 2. added `argparse` 3. updated README 4. removed redundant functions 5. optimized `run.py` 6. added configuration files in folder `configs` 7. optimized code-style
StepNeverStop · Jul 27, 2021 · 5bbd765 · 5bbd765
1 parent d65bc96
commit 5bbd765
Show file tree

Hide file tree

Showing 31 changed files with 398 additions and 560 deletions.
diff --git a/README.md b/README.md
@@ -177,41 +177,52 @@ For now, these algorithms are available:
 
 ```python
 """
-Usage:
-    python [options]
-
-Options:
-    -h,--help                   show help info
-    -a,--algorithm=<name>       specify the training algorithm [default: ppo]
-    -c,--copys=<n>              nums of environment copys that collect data in parallel [default: 1]
-    -d, --device=<str>          specify the device that operate Torch.Tensor [default: None]
-    -e, --env=<name>            specify the environment name [default: CartPole-v0]
-    -f,--file-name=<file>       specify the path of builded training environment of UNITY3D [default: None]
-    -g,--graphic                whether show graphic interface when using UNITY3D [default: False]
-    -i,--inference              inference the trained model, not train policies [default: False]
-    -p,--platform=<str>         specify the platform of training environment [default: gym]
-    -l,--load=<name>            specify the name of pre-trained model that need to load [default: None]
-    -m,--models=<n>             specify the number of trails that using different random seeds [default: 1]
-    -n,--name=<name>            specify the name of this training task [default: None]
-    -r,--rnn                    whether use rnn[GRU, LSTM, ...] or not [default: False]
-    -s,--save-frequency=<n>     specify the interval that saving model checkpoint [default: None]
-    -t,--train-step=<n>         specify the training step that optimize the policy model [default: None]
-    -u,--unity                  whether training with UNITY3D editor [default: False]
-    --port=<n>                  specify the port that communicate with training environment of UNITY3D [default: 5005]
-    --apex=<str>                i.e. "learner"/"worker"/"buffer"/"evaluator" [default: None]
-    --config-file=<file>        specify the path of training configuration file [default: None]
-    --store-dir=<file>          specify the directory that store model, log and others [default: None]
-    --seed=<n>                  specify the random seed of module random, numpy and pytorch [default: 42]
-    --env-seed=<n>              specify the environment random seed [default: 42]
-    --max-step=<n>              specify the maximum step per episode [default: None]
-    --train-episode=<n>         specify the training maximum episode [default: None]
-    --train-frame=<n>           specify the training maximum steps interacting with environment [default: None]
-    --prefill-steps=<n>         specify the number of experiences that should be collected before start training, use for off-policy algorithms [default: None]
-    --prefill-choose            whether choose action using model or choose randomly [default: False]
-    --render-episode=<n>        specify when to render the graphic interface of gym environment [default: None]
-    --info=<str>                write another information that describe this training task [default: None]
-    --hostname                  whether concatenate hostname with the training name [default: False]
-    --no-save                   specify whether save models/logs/summaries while training or not [default: False]
+usage: run.py [-h] [-c COPYS] [--seed SEED] [-r] [-p {gym,unity}]
+              [-a {pg,trpo,ppo,a2c,cem,aoc,ppoc,qs,ac,dpg,ddpg,pd_ddpg,td3,sac_v,sac,tac,dqn,ddqn,dddqn,averaged_dqn,c51,qrdqn,rainbow,iqn,maxsqn,sql,bootstrappeddqn,curl,oc,ioc,hiro,maddpg,vdn,iql}]
+              [-d DEVICE] [-i] [-l LOAD_PATH] [-m MODELS] [-n NAME] [-s SAVE_FREQUENCY] [--apex {learner,worker,buffer,evaluator}] [--config-file CONFIG_FILE]
+              [--store-dir STORE_DIR] [--episode-length EPISODE_LENGTH] [--prefill-steps PREFILL_STEPS] [--prefill-choose] [--hostname] [--no-save] [--info INFO]
+              [-e ENV] [-f FILE_NAME]
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -c COPYS, --copys COPYS
+                        nums of environment copys that collect data in parallel
+  --seed SEED           specify the random seed of module random, numpy and pytorch
+  -r, --render          whether render game interface
+  -p {gym,unity}, --platform {gym,unity}
+                        specify the platform of training environment
+  -a {pg,trpo,ppo,a2c,cem,aoc,ppoc,qs,ac,dpg,ddpg,pd_ddpg,td3,sac_v,sac,tac,dqn,ddqn,dddqn,averaged_dqn,c51,qrdqn,rainbow,iqn,maxsqn,sql,bootstrappeddqn,curl,oc,ioc,hiro,maddpg,vdn,iql}, --algorithm {pg,trpo,ppo,a2c,cem,aoc,ppoc,qs,ac,dpg,ddpg,pd_ddpg,td3,sac_v,sac,tac,dqn,ddqn,dddqn,averaged_dqn,c51,qrdqn,rainbow,iqn,maxsqn,sql,bootstrappeddqn,curl,oc,ioc,hiro,maddpg,vdn,iql}
+                        specify the training algorithm
+  -d DEVICE, --device DEVICE
+                        specify the device that operate Torch.Tensor
+  -i, --inference       inference the trained model, not train policies
+  -l LOAD_PATH, --load-path LOAD_PATH
+                        specify the name of pre-trained model that need to load
+  -m MODELS, --models MODELS
+                        specify the number of trails that using different random seeds
+  -n NAME, --name NAME  specify the name of this training task
+  -s SAVE_FREQUENCY, --save-frequency SAVE_FREQUENCY
+                        specify the interval that saving model checkpoint
+  --apex {learner,worker,buffer,evaluator}
+  --config-file CONFIG_FILE
+                        specify the path of training configuration file
+  --store-dir STORE_DIR
+                        specify the directory that store model, log and others
+  --episode-length EPISODE_LENGTH
+                        specify the maximum step per episode
+  --prefill-steps PREFILL_STEPS
+                        specify the number of experiences that should be collected before start training, use for off-policy algorithms
+  --prefill-choose      whether choose action using model or choose randomly
+  --hostname            whether concatenate hostname with the training name
+  --no-save             specify whether save models/logs/summaries while training or not
+  --info INFO           write another information that describe this training task
+  -e ENV, --env ENV     specify the environment name
+  -f FILE_NAME, --file-name FILE_NAME
+                        specify the path of builded training environment of UNITY3D
+```
+
+```python
+"""
 Example:
     python run.py
     python run.py -p gym -a dqn -e CartPole-v0 -c 12 -n dqn_cartpole --no-save
@@ -220,8 +231,6 @@ Example:
 """
 ```
 
-If you specify **gym**, **unity**, and **environment executable file path** simultaneously, the following priorities will be followed: gym > unity > unity_env.
-
 ## Notes
 
 1. log, model, training parameter configuration, and data are stored in `C:\RLData` for Windows, or `$HOME/RLData` for Linux/OSX
@@ -236,7 +245,6 @@ If you specify **gym**, **unity**, and **environment executable file path** simu
 7. set algorithms' hyper-parameters in [rls/configs/algorithms.yaml](https://github.com/StepNeverStop/RLs/blob/master/rls/configs/algorithms.yaml)
 8. set training default configuration in [config.yaml](https://github.com/StepNeverStop/RLs/blob/master/config.yaml)
 9. change neural network structure in [rls/nn/models.py](https://github.com/StepNeverStop/RLs/blob/master/rls/nn/models.py)
-10. MADDPG is only suitable for Unity3D ML-Agents for now.
 
 ## Ongoing things
 

diff --git a/config.yaml b/config.yaml
diff --git a/environment.yaml b/environment.yaml
@@ -26,7 +26,6 @@ dependencies:
     - cloudpickle==1.2.2
     - decorator==4.4.2
     - dm-tree==0.1.5
-    - docopt==0.6.2
     - future==0.18.2
     - gast==0.3.3
     - google-auth==1.23.0

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,3 @@
-docopt
 numpy
 pyyaml
 tqdm

diff --git a/rls/algos/base/base.py b/rls/algos/base/base.py
@@ -33,19 +33,14 @@ def __init__(self, *args, **kwargs):
         super().__init__()
         self.no_save = bool(kwargs.get('no_save', False))
         self.base_dir = base_dir = kwargs.get('base_dir')
-        self.device = kwargs.get('device', None) or ("cuda" if t.cuda.is_available() else "cpu")
+        self.device = kwargs.get('device', 'cpu')
         logger.info(colorize(f"PyTorch Tensor Device: {self.device}"))
 
         self.cp_dir, self.log_dir, self.excel_dir = [os.path.join(base_dir, i) for i in ['model', 'log', 'excel']]
 
         if not self.no_save:
             check_or_create(self.cp_dir, 'checkpoints(models)')
 
-        if 1 == 0:  # Not used
-            import pandas as pd
-            check_or_create(self.excel_dir, 'excel')
-            self.excel_writer = pd.ExcelWriter(self.excel_dir + '/data.xlsx')
-
         self.global_step = t.tensor(0, dtype=t.int64)
         self._worker_modules = {}
         self._trainer_modules = {'global_step': self.global_step}
@@ -81,9 +76,9 @@ def resume(self, base_dir: Optional[str] = None) -> NoReturn:
                 logger.error(e)
                 raise Exception(colorize(f'Resume model from {ckpt_path} FAILED.', color='red'))
             else:
-                logger.info(colorize(f'Resume model from {ckpt_path} SUCCUESS.', color='green'))
+                logger.info(colorize(f'Resume model from {ckpt_path} SUCCESSFULLY.', color='green'))
         else:
-            logger.info(colorize('Initialize model SUCCUESS.', color='green'))
+            logger.info(colorize('Initialize model SUCCESSFULLY.', color='green'))
 
     def save(self, **kwargs) -> NoReturn:
         """

diff --git a/rls/algos/base/ma_off_policy.py b/rls/algos/base/ma_off_policy.py
@@ -12,7 +12,7 @@
                     NoReturn)
 
 from rls.algos.base.ma_policy import MultiAgentPolicy
-from rls.common.yaml_ops import load_yaml
+from rls.common.yaml_ops import load_config
 from rls.memories.multi_replay_buffers import MultiAgentExperienceReplay
 from rls.common.specs import BatchExperiences
 
@@ -63,7 +63,7 @@ def initialize_data_buffer(self) -> NoReturn:
             )
             self.gamma = self.gamma ** self.n_step
 
-        default_buffer_args = load_yaml(f'rls/configs/off_policy_buffer.yaml')['MultiAgentExperienceReplay'][_type]
+        default_buffer_args = load_config(f'rls/configs/off_policy_buffer.yaml')['MultiAgentExperienceReplay'][_type]
         default_buffer_args.update(_buffer_args)
 
         self.data = MultiAgentExperienceReplay(n_agents=self.n_agents_percopy,

diff --git a/rls/algos/base/off_policy.py b/rls/algos/base/off_policy.py
@@ -13,7 +13,7 @@
 
 from rls.utils.np_utils import int2one_hot
 from rls.algos.base.policy import Policy
-from rls.common.yaml_ops import load_yaml
+from rls.common.yaml_ops import load_config
 from rls.common.specs import BatchExperiences
 
 
@@ -68,7 +68,7 @@ def initialize_data_buffer(self) -> NoReturn:
                 )
                 self.gamma = self.gamma ** self.n_step
 
-        default_buffer_args = load_yaml(f'rls/configs/off_policy_buffer.yaml')[_type]
+        default_buffer_args = load_config(f'rls/configs/off_policy_buffer.yaml')[_type]
         default_buffer_args.update(_buffer_args)
 
         Buffer = getattr(importlib.import_module(f'rls.memories.single_replay_buffers'), _type)

diff --git a/rls/algos/register.py b/rls/algos/register.py
@@ -9,7 +9,7 @@
                     Callable,
                     Dict)
 
-from rls.common.yaml_ops import load_yaml
+from rls.common.yaml_ops import load_config
 from rls.utils.display import colorize
 from rls.utils.logging_utils import get_logger
 
@@ -58,7 +58,7 @@ def get_model_info(name: str) -> Tuple[Callable, Dict, str, str]:
     model = getattr(importlib.import_module(f'rls.algos.{policy_type}.{name}'), class_name)
 
     algo_config = {}
-    algo_config.update(load_yaml(f'rls/configs/algorithms.yaml')['general'])
-    algo_config.update(load_yaml(f'rls/configs/algorithms.yaml')[policy_mode.replace('-', '_')])
-    algo_config.update(load_yaml(f'rls/configs/algorithms.yaml')[name])
+    algo_config.update(load_config(f'rls/configs/algorithms.yaml')['general'])
+    algo_config.update(load_config(f'rls/configs/algorithms.yaml')[policy_mode.replace('-', '_')])
+    algo_config.update(load_config(f'rls/configs/algorithms.yaml')[name])
     return model, algo_config, policy_mode, policy_type
diff --git a/rls/algos/single/curl.py b/rls/algos/single/curl.py
@@ -263,7 +263,7 @@ def train(self, BATCH, isw, cell_states, visual, visual_, pos):
         z_out = self.encoder_target(pos)
         logits = z_a @ (self.curl_w @ z_out.T)
         logits -= logits.max(-1, keepdim=True)[0]
-        curl_loss = t.nn.functional.cross_entropy(logits, t.arange(self.batch_size))
+        curl_loss = t.nn.functional.cross_entropy(logits, t.arange(logits.shape[0]))
         self.curl_oplr.step(curl_loss)
 
         feat = feat.detach()
-Original file line number
+Diff line change
@@ -1,4 +1,3 @@
-    docopt
     numpy
     pyyaml
     tqdm
@@ Expand Down @@