style(unity): change 'group' related to 'behavior' related naming format

StepNeverStop · Dec 31, 2020 · edb7f1f · edb7f1f
1 parent e7e659c
commit edb7f1f
Show file tree

Hide file tree

Showing 11 changed files with 154 additions and 156 deletions.
diff --git a/README.md b/README.md
@@ -246,7 +246,7 @@ If you specify **gym**, **unity**, and **environment executable file path** simu
 
 1. log, model, training parameter configuration, and data are stored in `C:\RLData` for Windows, or `$HOME/RLData` for Linux/OSX
 2. maybe need to use command `su` or `sudo` to run on a Linux/OSX
-3. record directory format is `RLData/Environment/Algorithm/Group name(for ml-agents)/Training name/config&log&model`
+3. record directory format is `RLData/Environment/Algorithm/Behavior name(for ml-agents)/Training name/config&log&model`
 4. make sure brains' number > 1 if specifying `ma*` algorithms like maddpg
 5. multi-agents algorithms doesn't support visual input and PER for now
 6. **need 3 steps to implement a new algorithm**
@@ -256,7 +256,7 @@ If you specify **gym**, **unity**, and **environment executable file path** simu
 7. set algorithms' hyper-parameters in [rls/algos/config.yaml](https://github.com/StepNeverStop/RLs/blob/master/rls/algos/config.yaml)
 8. set training default configuration in [config.yaml](https://github.com/StepNeverStop/RLs/blob/master/config.yaml)
 9. change neural network structure in [rls/nn/models.py](https://github.com/StepNeverStop/RLs/blob/master/rls/nn/models.py)
-10. MADDPG is only suitable for Unity3D ML-Agents for now. group name in training scene should be set like `{agents control nums of this group per environment copy}#{group_name}`, i.e. `2#3DBallAgents` means one group/team controls two same agents in one environment copy.
+10. MADDPG is only suitable for Unity3D ML-Agents for now. behavior name in training scene should be set like `{agents control nums of this group per environment copy}#{bahevior_name}`, i.e. `2#3DBallAgents` means one group/team controls two same agents in one environment copy.
 
 ## Ongoing things
 

diff --git a/config.yaml b/config.yaml
@@ -23,7 +23,10 @@ unity:
         real_done: true
         pre_fill_steps: 10000   # pre_fill_steps should be set to an integer multiple of '--copy' to get an accurate pre-fill number
     env:
-        file_path: *env
+        file_name: *env
+        worker_id: 0
+        timeout_wait: 60
+
         width: 84
         height: 84
         quality_level: 5

diff --git a/rls/algos/base/ma_policy.py b/rls/algos/base/ma_policy.py
@@ -22,12 +22,12 @@
 class MultiAgentPolicy(Base):
     def __init__(self, envspec: MultiAgentEnvArgs, **kwargs):
         super().__init__(**kwargs)
-        self.group_controls = envspec.group_controls
-        self.s_dim = count_repeats(envspec.s_dim, self.group_controls)
-        self.visual_sources = count_repeats(envspec.visual_sources, self.group_controls)    # not use yet
+        self.behavior_controls = envspec.behavior_controls
+        self.s_dim = count_repeats(envspec.s_dim, self.behavior_controls)
+        self.visual_sources = count_repeats(envspec.visual_sources, self.behavior_controls)    # not use yet
         # self.visual_resolutions = envspec.visual_resolutions
-        self.a_dim = count_repeats(envspec.a_dim, self.group_controls)
-        self.is_continuous = count_repeats(envspec.is_continuous, self.group_controls)
+        self.a_dim = count_repeats(envspec.a_dim, self.behavior_controls)
+        self.is_continuous = count_repeats(envspec.is_continuous, self.behavior_controls)
         self.n_agents = envspec.n_agents
         if not self.n_agents:
             raise ValueError('agents num is None.')
@@ -39,7 +39,7 @@ def __init__(self, envspec: MultiAgentEnvArgs, **kwargs):
         self.max_train_step = int(kwargs.get('max_train_step', 1000))
         self.delay_lr = bool(kwargs.get('decay_lr', True))
 
-        self.agent_sep_ctls = sum(self.group_controls)
+        self.agent_sep_ctls = sum(self.behavior_controls)
         self.writers = [self._create_writer(self.log_dir + f'_{i}') for i in range(self.agent_sep_ctls)]
 
     def init_lr(self, lr: float) -> Callable:

diff --git a/rls/common/train/unity.py b/rls/common/train/unity.py
@@ -43,23 +43,16 @@ def unity_train(env, model,
         save_frequency:         how often to save checkpoints.
         max_step_per_episode:   maximum number of steps for an episode.
         resampling_interval:    how often to resample parameters for env reset.
-    Variables:
-        group_names:    a list of group names set in Unity.
-        state: store    a list of states for each group. each item contain a list of states for each agents that controlled by the same group.
-        visual_state:   store a list of visual state information for each group.
-        action:         store a list of actions for each group.
-        dones_flag:     store a list of 'done' for each group. use for judge whether an episode is finished for every agents.
-        rewards:        use to record rewards of agents for each group.
     """
 
     sma = SMA(moving_average_episode)
     frame_step = begin_frame_step
     train_step = begin_train_step
-    n = env.group_agents[env.first_gn]
+    n = env.behavior_agents[env.first_bn]
 
     for episode in range(begin_episode, max_train_episode):
         model.reset()
-        ret = env.reset()[env.first_gn]
+        ret = env.reset()[env.first_bn]
         s = ret.corrected_vector
         visual_s = ret.corrected_visual
         dones_flag = np.zeros(n, dtype=float)
@@ -70,7 +63,7 @@ def unity_train(env, model,
         while True:
             step += 1
             action = model.choose_action(s=s, visual_s=visual_s)
-            ret = env.step({env.first_gn: action})[env.first_gn]
+            ret = env.step({env.first_bn: action})[env.first_bn]
 
             model.store_data(
                 s=s,
@@ -124,7 +117,7 @@ def unity_train(env, model,
             **sma.rs
         )
         print_func(f'Eps {episode:3d} | S {step:4d} | LDS {last_done_step:4d}', out_time=True)
-        print_func(f'{env.first_gn} R: {arrprint(rewards, 2)}')
+        print_func(f'{env.first_bn} R: {arrprint(rewards, 2)}')
 
         if add_noise2buffer and episode % add_noise2buffer_episode_interval == 0:
             unity_no_op(env, model, pre_fill_steps=add_noise2buffer_steps, prefill_choose=False, real_done=real_done,
@@ -141,21 +134,21 @@ def unity_no_op(env, model,
     Make sure steps is greater than n-step if using any n-step ReplayBuffer.
     '''
     assert isinstance(pre_fill_steps, int) and pre_fill_steps >= 0, 'no_op.steps must have type of int and larger than/equal 0'
-    n = env.group_agents[env.first_gn]
+    n = env.behavior_agents[env.first_bn]
 
     if pre_fill_steps == 0:
         return
     model.reset()
-    ret = env.reset()[env.first_gn]
+    ret = env.reset()[env.first_bn]
     s = ret.corrected_vector
     visual_s = ret.corrected_visual
 
     for _ in trange(0, pre_fill_steps, n, unit_scale=n, ncols=80, desc=desc, bar_format=bar_format):
         if prefill_choose:
             action = model.choose_action(s=s, visual_s=visual_s)
         else:
-            action = env.random_action()[env.first_gn]
-        ret = env.step({env.first_gn: action})[env.first_gn]
+            action = env.random_action()[env.first_bn]
+        ret = env.step({env.first_bn: action})[env.first_bn]
         model.no_op_store(
             s=s,
             visual_s=visual_s,
@@ -178,13 +171,13 @@ def unity_inference(env, model,
 
     for episode in range(episodes):
         model.reset()
-        ret = env.reset()[env.first_gn]
+        ret = env.reset()[env.first_bn]
         while True:
             action = model.choose_action(s=ret.corrected_vector,
                                          visual_s=ret.corrected_visual,
                                          evaluation=True)
             model.partial_reset(ret.done)
-            ret = env.step({env.first_gn: action})[env.first_gn]
+            ret = env.step({env.first_bn: action})[env.first_bn]
 
 
 def ma_unity_no_op(env, model,
@@ -197,8 +190,8 @@ def ma_unity_no_op(env, model,
     if pre_fill_steps == 0:
         return
 
-    data_change_func = multi_agents_data_preprocess(env.env_copys, env.group_controls)
-    action_reshape_func = multi_agents_action_reshape(env.env_copys, env.group_controls)
+    data_change_func = multi_agents_data_preprocess(env.env_copys, env.behavior_controls)
+    action_reshape_func = multi_agents_action_reshape(env.env_copys, env.behavior_controls)
     model.reset()
 
     # [s(s_brain1(agent1, agent2, ...), s_brain2, ...), visual_s, r, done, info]
@@ -210,7 +203,7 @@ def ma_unity_no_op(env, model,
         if prefill_choose:
             action = model.choose_action(s=s, visual_s=visual_s)    # [total_agents, batch, dimension]
             action = action_reshape_func(action)
-            actions = {f'{brain_name}': action[i] for i, brain_name in enumerate(env.group_names)}
+            actions = {f'{brain_name}': action[i] for i, brain_name in enumerate(env.behavior_names)}
         else:
             actions = env.random_action()
             action = list(actions.values())
@@ -254,9 +247,9 @@ def ma_unity_train(env, model,
     frame_step = begin_frame_step
     train_step = begin_train_step
 
-    data_change_func = multi_agents_data_preprocess(env.env_copys, env.group_controls)
-    action_reshape_func = multi_agents_action_reshape(env.env_copys, env.group_controls)
-    agents_num_per_copy = sum(env.group_controls)
+    data_change_func = multi_agents_data_preprocess(env.env_copys, env.behavior_controls)
+    action_reshape_func = multi_agents_action_reshape(env.env_copys, env.behavior_controls)
+    agents_num_per_copy = sum(env.behavior_controls)
 
     sma = [SMA(moving_average_episode) for _ in range(agents_num_per_copy)]
 
@@ -274,7 +267,7 @@ def ma_unity_train(env, model,
         while True:
             action = model.choose_action(s=s, visual_s=visual_s)    # [total_agents, batch, dimension]
             action = action_reshape_func(action)
-            actions = {f'{brain_name}': action[i] for i, brain_name in enumerate(env.group_names)}
+            actions = {f'{brain_name}': action[i] for i, brain_name in enumerate(env.behavior_names)}
             s_, visual_s_, r, done, info, corrected_s_, corrected_visual_s_ = env.step(actions)    # [Brains, Agents, Dims]
             step += 1
 
@@ -344,13 +337,13 @@ def ma_unity_inference(env, model,
     """
     inference mode. algorithm model will not be train, only used to show agents' behavior
     """
-    data_change_func = multi_agents_data_preprocess(env.env_copys, env.group_controls)
-    action_reshape_func = multi_agents_action_reshape(env.env_copys, env.group_controls)
+    data_change_func = multi_agents_data_preprocess(env.env_copys, env.behavior_controls)
+    action_reshape_func = multi_agents_action_reshape(env.env_copys, env.behavior_controls)
     for episode in range(episodes):
         model.reset()
         s, visual_s, _, _, _, _, _ = env.reset()
         while True:
             action = model.choose_action(s=s, visual_s=visual_s, evaluation=True)    # [total_agents, batch, dimension]
             action = action_reshape_func(action)
-            actions = {f'{brain_name}': action[i] for i, brain_name in enumerate(env.group_names)}
+            actions = {f'{brain_name}': action[i] for i, brain_name in enumerate(env.behavior_names)}
             _, _, _, _, _, s, visual_s_ = env.step(actions)
diff --git a/rls/common/trainer.py b/rls/common/trainer.py
@@ -162,12 +162,12 @@ def initialize_gym(self):
 
     def initialize_unity(self):
         # single agent with unity
-        self.train_args.base_dir = os.path.join(self.train_args.base_dir, self.env.first_fgn)
+        self.train_args.base_dir = os.path.join(self.train_args.base_dir, self.env.first_fbn)
         if self.train_args.load_model_path is not None:
-            self.train_args.load_model_path = os.path.join(self.train_args.load_model_path, self.env.first_fgn)
+            self.train_args.load_model_path = os.path.join(self.train_args.load_model_path, self.env.first_fbn)
 
         if 'Nstep' in self.buffer_args['type'] or 'Episode' in self.buffer_args['type']:
-            self.buffer_args[self.buffer_args['type']]['agents_num'] = self.env.group_agents[self.env.first_gn]
+            self.buffer_args[self.buffer_args['type']]['agents_num'] = self.env.behavior_agents[self.env.first_bn]
         buffer = get_buffer(self.buffer_args)
 
         self.algo_args.update({
@@ -195,7 +195,7 @@ def initialize_unity(self):
 
     def initialize_multi_unity(self):
         # multi agents with unity
-        assert self.env.group_num > 1, 'if using ma* algorithms, number of brains must larger than 1'
+        assert self.env.behavior_num > 1, 'if using ma* algorithms, number of brains must larger than 1'
 
         if 'Nstep' in self.buffer_args['type'] or 'Episode' in self.buffer_args['type']:
             self.buffer_args[self.buffer_args['type']]['agents_num'] = self.env_args['env_num']