Skip to content

Commit

Permalink
Speed profiling
Browse files Browse the repository at this point in the history
  • Loading branch information
ASzot committed Jan 23, 2023
1 parent cf2ccb6 commit 1b9bd00
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 25 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@
* `pip install -e habitat-lab`
* `pip install -e habitat-baselines`

# Running the Benchmark
From the `/home/aszot/habitat-lab/habitat-baselines/` directory
```unix
MADRONA_MWGPU_KERNEL_CACHE=/home/aszot/madrona_gpu_tmp CUDA_VISIBLE_DEVICES=0 /home/aszot/miniconda3/envs/madrona2/bin/python ../scripts/time_runs.py
```
Data is then saved to `data/speeds.csv`

# Repro Physics Bugs
From `habitat-lab/habitat-baselines/` run
```
Expand Down
54 changes: 30 additions & 24 deletions habitat-baselines/habitat_baselines/common/madrona.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,12 @@ def __init__(self, config):
max_entities_per_world=3,
render_width=64,
render_height=64,
lidar_render=True,
debug_compile=False,
)
self._action = self._sim.action_tensor().to_torch()
self._rgb = self._sim.rgb_tensor().to_torch()
# self._rgb = self._sim.rgb_tensor().to_torch()
self._lidar = self._sim.lidar_tensor().to_torch()
self._reset = self._sim.reset_tensor().to_torch()
self._reward = self._sim.reward_tensor().to_torch()
self._done = self._sim.done_tensor().to_torch()
Expand Down Expand Up @@ -143,6 +145,7 @@ def _get_obs_no_cp(self):
self._agents_vis * self._agent_data,
self._box_vis * self._box_data,
self._ramp_vis * self._ramp_data,
self._lidar,
]
dat = [x.view(self._num_envs, self._max_num_agents, -1) for x in dat]
dat.extend(
Expand Down Expand Up @@ -253,30 +256,33 @@ def step(self, action):
self._seeker_reward *= not_done_orig

reward = self._agent_batch(reward)
info = {
"box_dist": torch.nan_to_num(
torch.linalg.norm(pos_diff[:, :9], dim=-1)
),
"ramp_dist": torch.nan_to_num(
torch.linalg.norm(pos_diff[:, 9:11], dim=-1)
),
"agent_dist": torch.nan_to_num(
torch.linalg.norm(pos_diff[:, 11:], dim=-1)
),
"hider_r": self._hider_reward.view(-1, 3),
"seekr_r": self._seeker_reward.view(-1, 3),
}
info = {
k: v.mean(-1, keepdims=True)
.repeat(1, self._max_num_agents)
.view(-1, 1)
for k, v in info.items()
}
info.update(
{
"r_t": reward,
if self._config.habitat_baselines.speed_mode:
info = {}
else:
info = {
"box_dist": torch.nan_to_num(
torch.linalg.norm(pos_diff[:, :9], dim=-1)
),
"ramp_dist": torch.nan_to_num(
torch.linalg.norm(pos_diff[:, 9:11], dim=-1)
),
"agent_dist": torch.nan_to_num(
torch.linalg.norm(pos_diff[:, 11:], dim=-1)
),
"hider_r": self._hider_reward.view(-1, 3),
"seekr_r": self._seeker_reward.view(-1, 3),
}
)
info = {
k: v.mean(-1, keepdims=True)
.repeat(1, self._max_num_agents)
.view(-1, 1)
for k, v in info.items()
}
info.update(
{
"r_t": reward,
}
)

self._last = (obs, reward, done, info)
return self._last
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,7 @@ class HabitatBaselinesConfig(HabitatBaselinesBaseConfig):
verbose: bool = True
debug_env: bool = False
dry_run: bool = False
speed_mode: bool = False
eval_keys_to_include_in_name: List[str] = field(default_factory=list)
# For our use case, the CPU side things are mainly memory copies
# and nothing of substantive compute. PyTorch has been making
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ habitat_baselines:
verbose: False
dry_run: False
debug_env: False
speed_mode: False
trainer_name: "ddppo"
torch_gpu_id: 0
tensorboard_dir: "tb"
Expand All @@ -18,7 +19,10 @@ habitat_baselines:
load_resume_state_config: False
test_episode_count: 10
eval_ckpt_path_dir: "data/new_checkpoints"
num_environments: 7800
# For RTX 8000
num_environments: 10998
# For 3090
# num_environments: 6000
writer_type: 'tb'
checkpoint_folder: "data/new_checkpoints"
num_updates: -1
Expand Down
63 changes: 63 additions & 0 deletions scripts/time_runs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import os
import shlex
from argparse import ArgumentParser
from collections import defaultdict
from subprocess import PIPE, Popen

import pandas as pd

SAVE_DIR = "data/"


def get_cmd(num_procs, num_updates):
# The actual number of environments is 1/6 of the set number (since the RL code treats agent as its own env).
return f"python -m rl_utils.launcher --cfg /home/aszot/speed.yaml --proj-dat nowb,speed python habitat_baselines/run.py --exp-config habitat_baselines/config/rearrange/madrona.yaml --run-type train habitat_baselines.num_environments={num_procs*6} habitat_baselines.total_num_steps=-1 habitat_baselines.num_updates={num_updates}"


if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--debug", action="store_true")
parser.add_argument("--num-updates", default=10, type=int)
args = parser.parse_args()

all_dat = defaultdict(list)

base_n_envs = 984

for proc_scaling in [1, 2, 4, 8, 16, 32]:
nprocs = base_n_envs // proc_scaling
cmd = get_cmd(nprocs, args.num_updates)
cmd_parts = shlex.split(cmd)
process = Popen(cmd_parts, stdout=PIPE)
(output, err) = process.communicate()
exit_code = process.wait()
output = output.decode("UTF-8").rstrip()

log_file = None
for x in output.split(" "):
if "habitat_baselines.log_file" in x:
log_file = x.split("=")[-1]

def get_time(line, time_name):
time = line.split(time_name)[-1].strip().split("\t")[0].strip()
if "s" in time:
time = time[:-1]
return float(time)

with open(log_file, "r") as f:
lines = f.readlines()
fps = get_time(lines[-3], "fps:")
env = get_time(lines[-2], "env-time:")
pth = get_time(lines[-2], "pth-time:")

print(f"#Procs={nprocs}: fps: {fps}, env: {env}, pth: {pth}")
all_dat["num-processes"].append(nprocs)
all_dat["FPS"].append(fps)
all_dat["sim-time-(seconds)"].append(env)
all_dat["learning-time-(seconds)"].append(pth)
if args.debug:
break
df = pd.DataFrame.from_dict(all_dat)
print("Final Results\n", df)
os.makedirs(SAVE_DIR, exist_ok=True)
df.to_csv(os.path.join(SAVE_DIR, "speeds.csv"))

0 comments on commit 1b9bd00

Please sign in to comment.