Skip to content

Commit

Permalink
style(unity): change 'group' related to 'behavior' related naming format
Browse files Browse the repository at this point in the history
  • Loading branch information
StepNeverStop committed Dec 31, 2020
1 parent e7e659c commit edb7f1f
Show file tree
Hide file tree
Showing 11 changed files with 154 additions and 156 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ If you specify **gym**, **unity**, and **environment executable file path** simu

1. log, model, training parameter configuration, and data are stored in `C:\RLData` for Windows, or `$HOME/RLData` for Linux/OSX
2. maybe need to use command `su` or `sudo` to run on a Linux/OSX
3. record directory format is `RLData/Environment/Algorithm/Group name(for ml-agents)/Training name/config&log&model`
3. record directory format is `RLData/Environment/Algorithm/Behavior name(for ml-agents)/Training name/config&log&model`
4. make sure brains' number > 1 if specifying `ma*` algorithms like maddpg
5. multi-agents algorithms doesn't support visual input and PER for now
6. **need 3 steps to implement a new algorithm**
Expand All @@ -256,7 +256,7 @@ If you specify **gym**, **unity**, and **environment executable file path** simu
7. set algorithms' hyper-parameters in [rls/algos/config.yaml](https://github.com/StepNeverStop/RLs/blob/master/rls/algos/config.yaml)
8. set training default configuration in [config.yaml](https://github.com/StepNeverStop/RLs/blob/master/config.yaml)
9. change neural network structure in [rls/nn/models.py](https://github.com/StepNeverStop/RLs/blob/master/rls/nn/models.py)
10. MADDPG is only suitable for Unity3D ML-Agents for now. group name in training scene should be set like `{agents control nums of this group per environment copy}#{group_name}`, i.e. `2#3DBallAgents` means one group/team controls two same agents in one environment copy.
10. MADDPG is only suitable for Unity3D ML-Agents for now. behavior name in training scene should be set like `{agents control nums of this group per environment copy}#{bahevior_name}`, i.e. `2#3DBallAgents` means one group/team controls two same agents in one environment copy.

## Ongoing things

Expand Down
5 changes: 4 additions & 1 deletion config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ unity:
real_done: true
pre_fill_steps: 10000 # pre_fill_steps should be set to an integer multiple of '--copy' to get an accurate pre-fill number
env:
file_path: *env
file_name: *env
worker_id: 0
timeout_wait: 60

width: 84
height: 84
quality_level: 5
Expand Down
12 changes: 6 additions & 6 deletions rls/algos/base/ma_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@
class MultiAgentPolicy(Base):
def __init__(self, envspec: MultiAgentEnvArgs, **kwargs):
super().__init__(**kwargs)
self.group_controls = envspec.group_controls
self.s_dim = count_repeats(envspec.s_dim, self.group_controls)
self.visual_sources = count_repeats(envspec.visual_sources, self.group_controls) # not use yet
self.behavior_controls = envspec.behavior_controls
self.s_dim = count_repeats(envspec.s_dim, self.behavior_controls)
self.visual_sources = count_repeats(envspec.visual_sources, self.behavior_controls) # not use yet
# self.visual_resolutions = envspec.visual_resolutions
self.a_dim = count_repeats(envspec.a_dim, self.group_controls)
self.is_continuous = count_repeats(envspec.is_continuous, self.group_controls)
self.a_dim = count_repeats(envspec.a_dim, self.behavior_controls)
self.is_continuous = count_repeats(envspec.is_continuous, self.behavior_controls)
self.n_agents = envspec.n_agents
if not self.n_agents:
raise ValueError('agents num is None.')
Expand All @@ -39,7 +39,7 @@ def __init__(self, envspec: MultiAgentEnvArgs, **kwargs):
self.max_train_step = int(kwargs.get('max_train_step', 1000))
self.delay_lr = bool(kwargs.get('decay_lr', True))

self.agent_sep_ctls = sum(self.group_controls)
self.agent_sep_ctls = sum(self.behavior_controls)
self.writers = [self._create_writer(self.log_dir + f'_{i}') for i in range(self.agent_sep_ctls)]

def init_lr(self, lr: float) -> Callable:
Expand Down
47 changes: 20 additions & 27 deletions rls/common/train/unity.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,23 +43,16 @@ def unity_train(env, model,
save_frequency: how often to save checkpoints.
max_step_per_episode: maximum number of steps for an episode.
resampling_interval: how often to resample parameters for env reset.
Variables:
group_names: a list of group names set in Unity.
state: store a list of states for each group. each item contain a list of states for each agents that controlled by the same group.
visual_state: store a list of visual state information for each group.
action: store a list of actions for each group.
dones_flag: store a list of 'done' for each group. use for judge whether an episode is finished for every agents.
rewards: use to record rewards of agents for each group.
"""

sma = SMA(moving_average_episode)
frame_step = begin_frame_step
train_step = begin_train_step
n = env.group_agents[env.first_gn]
n = env.behavior_agents[env.first_bn]

for episode in range(begin_episode, max_train_episode):
model.reset()
ret = env.reset()[env.first_gn]
ret = env.reset()[env.first_bn]
s = ret.corrected_vector
visual_s = ret.corrected_visual
dones_flag = np.zeros(n, dtype=float)
Expand All @@ -70,7 +63,7 @@ def unity_train(env, model,
while True:
step += 1
action = model.choose_action(s=s, visual_s=visual_s)
ret = env.step({env.first_gn: action})[env.first_gn]
ret = env.step({env.first_bn: action})[env.first_bn]

model.store_data(
s=s,
Expand Down Expand Up @@ -124,7 +117,7 @@ def unity_train(env, model,
**sma.rs
)
print_func(f'Eps {episode:3d} | S {step:4d} | LDS {last_done_step:4d}', out_time=True)
print_func(f'{env.first_gn} R: {arrprint(rewards, 2)}')
print_func(f'{env.first_bn} R: {arrprint(rewards, 2)}')

if add_noise2buffer and episode % add_noise2buffer_episode_interval == 0:
unity_no_op(env, model, pre_fill_steps=add_noise2buffer_steps, prefill_choose=False, real_done=real_done,
Expand All @@ -141,21 +134,21 @@ def unity_no_op(env, model,
Make sure steps is greater than n-step if using any n-step ReplayBuffer.
'''
assert isinstance(pre_fill_steps, int) and pre_fill_steps >= 0, 'no_op.steps must have type of int and larger than/equal 0'
n = env.group_agents[env.first_gn]
n = env.behavior_agents[env.first_bn]

if pre_fill_steps == 0:
return
model.reset()
ret = env.reset()[env.first_gn]
ret = env.reset()[env.first_bn]
s = ret.corrected_vector
visual_s = ret.corrected_visual

for _ in trange(0, pre_fill_steps, n, unit_scale=n, ncols=80, desc=desc, bar_format=bar_format):
if prefill_choose:
action = model.choose_action(s=s, visual_s=visual_s)
else:
action = env.random_action()[env.first_gn]
ret = env.step({env.first_gn: action})[env.first_gn]
action = env.random_action()[env.first_bn]
ret = env.step({env.first_bn: action})[env.first_bn]
model.no_op_store(
s=s,
visual_s=visual_s,
Expand All @@ -178,13 +171,13 @@ def unity_inference(env, model,

for episode in range(episodes):
model.reset()
ret = env.reset()[env.first_gn]
ret = env.reset()[env.first_bn]
while True:
action = model.choose_action(s=ret.corrected_vector,
visual_s=ret.corrected_visual,
evaluation=True)
model.partial_reset(ret.done)
ret = env.step({env.first_gn: action})[env.first_gn]
ret = env.step({env.first_bn: action})[env.first_bn]


def ma_unity_no_op(env, model,
Expand All @@ -197,8 +190,8 @@ def ma_unity_no_op(env, model,
if pre_fill_steps == 0:
return

data_change_func = multi_agents_data_preprocess(env.env_copys, env.group_controls)
action_reshape_func = multi_agents_action_reshape(env.env_copys, env.group_controls)
data_change_func = multi_agents_data_preprocess(env.env_copys, env.behavior_controls)
action_reshape_func = multi_agents_action_reshape(env.env_copys, env.behavior_controls)
model.reset()

# [s(s_brain1(agent1, agent2, ...), s_brain2, ...), visual_s, r, done, info]
Expand All @@ -210,7 +203,7 @@ def ma_unity_no_op(env, model,
if prefill_choose:
action = model.choose_action(s=s, visual_s=visual_s) # [total_agents, batch, dimension]
action = action_reshape_func(action)
actions = {f'{brain_name}': action[i] for i, brain_name in enumerate(env.group_names)}
actions = {f'{brain_name}': action[i] for i, brain_name in enumerate(env.behavior_names)}
else:
actions = env.random_action()
action = list(actions.values())
Expand Down Expand Up @@ -254,9 +247,9 @@ def ma_unity_train(env, model,
frame_step = begin_frame_step
train_step = begin_train_step

data_change_func = multi_agents_data_preprocess(env.env_copys, env.group_controls)
action_reshape_func = multi_agents_action_reshape(env.env_copys, env.group_controls)
agents_num_per_copy = sum(env.group_controls)
data_change_func = multi_agents_data_preprocess(env.env_copys, env.behavior_controls)
action_reshape_func = multi_agents_action_reshape(env.env_copys, env.behavior_controls)
agents_num_per_copy = sum(env.behavior_controls)

sma = [SMA(moving_average_episode) for _ in range(agents_num_per_copy)]

Expand All @@ -274,7 +267,7 @@ def ma_unity_train(env, model,
while True:
action = model.choose_action(s=s, visual_s=visual_s) # [total_agents, batch, dimension]
action = action_reshape_func(action)
actions = {f'{brain_name}': action[i] for i, brain_name in enumerate(env.group_names)}
actions = {f'{brain_name}': action[i] for i, brain_name in enumerate(env.behavior_names)}
s_, visual_s_, r, done, info, corrected_s_, corrected_visual_s_ = env.step(actions) # [Brains, Agents, Dims]
step += 1

Expand Down Expand Up @@ -344,13 +337,13 @@ def ma_unity_inference(env, model,
"""
inference mode. algorithm model will not be train, only used to show agents' behavior
"""
data_change_func = multi_agents_data_preprocess(env.env_copys, env.group_controls)
action_reshape_func = multi_agents_action_reshape(env.env_copys, env.group_controls)
data_change_func = multi_agents_data_preprocess(env.env_copys, env.behavior_controls)
action_reshape_func = multi_agents_action_reshape(env.env_copys, env.behavior_controls)
for episode in range(episodes):
model.reset()
s, visual_s, _, _, _, _, _ = env.reset()
while True:
action = model.choose_action(s=s, visual_s=visual_s, evaluation=True) # [total_agents, batch, dimension]
action = action_reshape_func(action)
actions = {f'{brain_name}': action[i] for i, brain_name in enumerate(env.group_names)}
actions = {f'{brain_name}': action[i] for i, brain_name in enumerate(env.behavior_names)}
_, _, _, _, _, s, visual_s_ = env.step(actions)
8 changes: 4 additions & 4 deletions rls/common/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,12 +162,12 @@ def initialize_gym(self):

def initialize_unity(self):
# single agent with unity
self.train_args.base_dir = os.path.join(self.train_args.base_dir, self.env.first_fgn)
self.train_args.base_dir = os.path.join(self.train_args.base_dir, self.env.first_fbn)
if self.train_args.load_model_path is not None:
self.train_args.load_model_path = os.path.join(self.train_args.load_model_path, self.env.first_fgn)
self.train_args.load_model_path = os.path.join(self.train_args.load_model_path, self.env.first_fbn)

if 'Nstep' in self.buffer_args['type'] or 'Episode' in self.buffer_args['type']:
self.buffer_args[self.buffer_args['type']]['agents_num'] = self.env.group_agents[self.env.first_gn]
self.buffer_args[self.buffer_args['type']]['agents_num'] = self.env.behavior_agents[self.env.first_bn]
buffer = get_buffer(self.buffer_args)

self.algo_args.update({
Expand Down Expand Up @@ -195,7 +195,7 @@ def initialize_unity(self):

def initialize_multi_unity(self):
# multi agents with unity
assert self.env.group_num > 1, 'if using ma* algorithms, number of brains must larger than 1'
assert self.env.behavior_num > 1, 'if using ma* algorithms, number of brains must larger than 1'

if 'Nstep' in self.buffer_args['type'] or 'Episode' in self.buffer_args['type']:
self.buffer_args[self.buffer_args['type']]['agents_num'] = self.env_args['env_num']
Expand Down
Loading

0 comments on commit edb7f1f

Please sign in to comment.