Skip to content

Commit

Permalink
[RLlib] Memory leak finding toolset using tracemalloc + CI memory lea…
Browse files Browse the repository at this point in the history
…k tests. (ray-project#15412)
  • Loading branch information
sven1977 authored Apr 12, 2022
1 parent d7ef546 commit a849474
Show file tree
Hide file tree
Showing 35 changed files with 1,083 additions and 204 deletions.
64 changes: 33 additions & 31 deletions .buildkite/pipeline.ml.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
- DATA_PROCESSING_TESTING=1 INSTALL_HOROVOD=1 ./ci/travis/install-dependencies.sh
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=-gpu python/ray/ml/...

- label: ":brain: RLlib: Learning discr. actions TF2-static-graph (from rllib/tuned_examples/*.yaml)"
- label: ":brain: RLlib: Learning discr. actions TF2-static-graph"
conditions: ["RAY_CI_RLLIB_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
Expand All @@ -15,7 +15,7 @@
--test_tag_filters=learning_tests_discrete,-fake_gpus,-torch_only,-tf2_only,-no_tf_static_graph
--test_arg=--framework=tf
rllib/...
- label: ":brain: RLlib: Learning cont. actions TF2-static-graph (from rllib/tuned_examples/*.yaml)"
- label: ":brain: RLlib: Learning cont. actions TF2-static-graph"
conditions: ["RAY_CI_RLLIB_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
Expand All @@ -25,7 +25,7 @@
--test_tag_filters=learning_tests_continuous,-fake_gpus,-torch_only,-tf2_only,-no_tf_static_graph
--test_arg=--framework=tf
rllib/...
- label: ":brain: RLlib: Learning discr. actions TF2-eager-tracing (from rllib/tuned_examples/*.yaml)"
- label: ":brain: RLlib: Learning discr. actions TF2-eager-tracing"
conditions: ["RAY_CI_RLLIB_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
Expand All @@ -35,7 +35,7 @@
--test_tag_filters=learning_tests_discrete,-fake_gpus,-torch_only,-multi_gpu,-no_tf_eager_tracing
--test_arg=--framework=tf2
rllib/...
- label: ":brain: RLlib: Learning cont. actions TF2-eager-tracing (from rllib/tuned_examples/*.yaml)"
- label: ":brain: RLlib: Learning cont. actions TF2-eager-tracing"
conditions: ["RAY_CI_RLLIB_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
Expand All @@ -46,27 +46,7 @@
--test_arg=--framework=tf2
rllib/...

- label: ":brain: RLlib: Learning discr. actions TF1-static-graph (from rllib/tuned_examples/*.yaml)"
conditions: ["RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
- RLLIB_TESTING=1 PYTHON=3.7 TF_VERSION=1.14.0 TFP_VERSION=0.7 ./ci/travis/install-dependencies.sh
- bazel test --config=ci $(./scripts/bazel_export_options)
--build_tests_only
--test_tag_filters=learning_tests_discrete,-fake_gpus,-torch_only,-tf2_only,-no_tf_static_graph,-multi_gpu
--test_arg=--framework=tf
rllib/...
- label: ":brain: RLlib: Learning cont. actions TF1-static-graph (from rllib/tuned_examples/*.yaml)"
conditions: ["RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
- RLLIB_TESTING=1 PYTHON=3.7 TF_VERSION=1.14.0 TFP_VERSION=0.7 ./ci/travis/install-dependencies.sh
- bazel test --config=ci $(./scripts/bazel_export_options)
--build_tests_only
--test_tag_filters=learning_tests_continuous,-fake_gpus,-torch_only,-tf2_only,-no_tf_static_graph,-multi_gpu
--test_arg=--framework=tf
rllib/...
- label: ":brain: RLlib: Learning discr. actions PyTorch (from rllib/tuned_examples/*.yaml)"
- label: ":brain: RLlib: Learning discr. actions PyTorch"
conditions: ["RAY_CI_RLLIB_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
Expand All @@ -76,7 +56,7 @@
--test_tag_filters=learning_tests_discrete,-fake_gpus,-tf_only,-tf2_only,-multi_gpu
--test_arg=--framework=torch
rllib/...
- label: ":brain: RLlib: Learning cont. actions PyTorch (from rllib/tuned_examples/*.yaml)"
- label: ":brain: RLlib: Learning cont. actions PyTorch"
conditions: ["RAY_CI_RLLIB_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
Expand All @@ -86,7 +66,7 @@
--test_tag_filters=learning_tests_continuous,-fake_gpus,-tf_only,-tf2_only,-multi_gpu
--test_arg=--framework=torch
rllib/...
- label: ":brain: RLlib: Learning tests w/ 2 fake GPUs TF2-static-graph (from rllib/tuned_examples/*.yaml)"
- label: ":brain: RLlib: Learning tests w/ 2 fake GPUs TF2-static-graph"
conditions: ["RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
Expand All @@ -97,7 +77,7 @@
--test_arg=--framework=tf
rllib/...
# TODO: (sven) tf2 (eager) multi-GPU
- label: ":brain: RLlib: Learning tests w/ 2 fake GPUs PyTorch (from rllib/tuned_examples/*.yaml)"
- label: ":brain: RLlib: Learning tests w/ 2 fake GPUs PyTorch"
conditions: ["RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
Expand All @@ -108,6 +88,28 @@
--test_arg=--framework=torch
rllib/...

- label: ":brain: RLlib: Memory leak tests TF2-eager-tracing"
conditions: ["RAY_CI_RLLIB_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
- RLLIB_TESTING=1 PYTHON=3.7 ./ci/travis/install-dependencies.sh
- bazel test --config=ci $(./scripts/bazel_export_options)
--build_tests_only
--test_tag_filters=memory_leak_tests,-flaky
--test_arg=--framework=tf2
rllib/...

- label: ":brain: RLlib: Memory leak tests PyTorch"
conditions: ["RAY_CI_RLLIB_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
- RLLIB_TESTING=1 PYTHON=3.7 ./ci/travis/install-dependencies.sh
- bazel test --config=ci $(./scripts/bazel_export_options)
--build_tests_only
--test_tag_filters=memory_leak_tests,-flaky
--test_arg=--framework=torch
rllib/...

- label: ":brain: RLlib: Quick Agent train.py runs (TODO: obsolete)"
conditions: ["RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
commands:
Expand Down Expand Up @@ -152,7 +154,7 @@
# "learning_tests|quick_train|examples|tests_dir".
- bazel test --config=ci $(./scripts/bazel_export_options)
--build_tests_only
--test_tag_filters=-learning_tests,-quick_train,-examples,-tests_dir,-trainers_dir,-documentation,-multi_gpu
--test_tag_filters=-learning_tests,-quick_train,-memory_leak_tests,-examples,-tests_dir,-trainers_dir,-documentation,-multi_gpu
--test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
rllib/...

Expand All @@ -164,14 +166,14 @@
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only
--test_tag_filters=examples_A,examples_B,-multi_gpu --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...

- label: ":brain: RLlib: Examples {Ca..t}"
- label: ":brain: RLlib: Examples {Ca..Ct}"
conditions: ["RAY_CI_RLLIB_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
- RLLIB_TESTING=1 PYTHON=3.7 ./ci/travis/install-dependencies.sh
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only
--test_tag_filters=examples_C_AtoT,-multi_gpu --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
- label: ":brain: RLlib: Examples {Cu..z}"
- label: ":brain: RLlib: Examples {Cu..Cz}"
conditions: ["RAY_CI_RLLIB_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
Expand Down
86 changes: 86 additions & 0 deletions rllib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
# - Examples directory (everything in rllib/examples/...), tagged: "examples" and
# "examples_[A-Z]"

# - Memory leak tests tagged "memory_leak_tests".

# Note: The "examples" and "tests_dir" tags have further sub-tags going by the
# starting letter of the test name (e.g. "examples_A", or "tests_dir_F") for
# split-up purposes in buildkite.
Expand Down Expand Up @@ -582,6 +584,14 @@ py_test(
srcs = ["agents/tests/test_callbacks.py"]
)

py_test(
name = "test_memory_leaks_generic",
main = "agents/tests/test_memory_leaks.py",
tags = ["team:ml", "trainers_dir"],
size = "large",
srcs = ["agents/tests/test_memory_leaks.py"]
)

py_test(
name = "test_trainer",
tags = ["team:ml", "trainers_dir", "trainers_dir_generic"],
Expand Down Expand Up @@ -852,6 +862,82 @@ py_test(
)


# --------------------------------------------------------------------
# Memory leak tests
#
# Tag: memory_leak_tests
# --------------------------------------------------------------------

py_test(
name = "test_memory_leak_a3c",
tags = ["team:ml", "memory_leak_tests"],
main = "utils/tests/run_memory_leak_tests.py",
size = "large",
srcs = ["utils/tests/run_memory_leak_tests.py"],
data = ["tuned_examples/a3c/memory-leak-test-a3c.yaml"],
args = ["--yaml-dir=tuned_examples/a3c"]
)

py_test(
name = "test_memory_leak_appo",
tags = ["team:ml", "memory_leak_tests"],
main = "utils/tests/run_memory_leak_tests.py",
size = "large",
srcs = ["utils/tests/run_memory_leak_tests.py"],
data = ["tuned_examples/ppo/memory-leak-test-appo.yaml"],
args = ["--yaml-dir=tuned_examples/ppo"]
)

py_test(
name = "test_memory_leak_ddpg",
tags = ["team:ml", "memory_leak_tests"],
main = "utils/tests/run_memory_leak_tests.py",
size = "large",
srcs = ["utils/tests/run_memory_leak_tests.py"],
data = ["tuned_examples/ddpg/memory-leak-test-ddpg.yaml"],
args = ["--yaml-dir=tuned_examples/ddpg"]
)

py_test(
name = "test_memory_leak_dqn",
tags = ["team:ml", "memory_leak_tests"],
main = "utils/tests/run_memory_leak_tests.py",
size = "large",
srcs = ["utils/tests/run_memory_leak_tests.py"],
data = ["tuned_examples/dqn/memory-leak-test-dqn.yaml"],
args = ["--yaml-dir=tuned_examples/dqn"]
)

py_test(
name = "test_memory_leak_impala",
tags = ["team:ml", "memory_leak_tests"],
main = "utils/tests/run_memory_leak_tests.py",
size = "large",
srcs = ["utils/tests/run_memory_leak_tests.py"],
data = ["tuned_examples/impala/memory-leak-test-impala.yaml"],
args = ["--yaml-dir=tuned_examples/impala"]
)

py_test(
name = "test_memory_leak_ppo",
tags = ["team:ml", "memory_leak_tests"],
main = "utils/tests/run_memory_leak_tests.py",
size = "large",
srcs = ["utils/tests/run_memory_leak_tests.py"],
data = ["tuned_examples/ppo/memory-leak-test-ppo.yaml"],
args = ["--yaml-dir=tuned_examples/ppo"]
)

py_test(
name = "test_memory_leak_sac",
tags = ["team:ml", "memory_leak_tests"],
main = "utils/tests/run_memory_leak_tests.py",
size = "large",
srcs = ["utils/tests/run_memory_leak_tests.py"],
data = ["tuned_examples/sac/memory-leak-test-sac.yaml"],
args = ["--yaml-dir=tuned_examples/sac"]
)

# --------------------------------------------------------------------
# Agents (quick training test iterations via `rllib train`)
#
Expand Down
14 changes: 7 additions & 7 deletions rllib/agents/ddpg/tests/test_ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,16 @@ def test_ddpg_exploration_and_with_random_prerun(self):
trainer = ddpg.DDPGTrainer(config=config, env="Pendulum-v1")
# Setting explore=False should always return the same action.
a_ = trainer.compute_single_action(obs, explore=False)
self.assertEqual(trainer.get_policy().global_timestep, 1)
check(trainer.get_policy().global_timestep, 1)
for i in range(50):
a = trainer.compute_single_action(obs, explore=False)
self.assertEqual(trainer.get_policy().global_timestep, i + 2)
check(trainer.get_policy().global_timestep, i + 2)
check(a, a_)
# explore=None (default: explore) should return different actions.
actions = []
for i in range(50):
actions.append(trainer.compute_single_action(obs))
self.assertEqual(trainer.get_policy().global_timestep, i + 52)
check(trainer.get_policy().global_timestep, i + 52)
check(np.std(actions), 0.0, false=True)
trainer.stop()

Expand All @@ -117,25 +117,25 @@ def test_ddpg_exploration_and_with_random_prerun(self):
trainer = ddpg.DDPGTrainer(config=config, env="Pendulum-v1")
# ts=0 (get a deterministic action as per explore=False).
deterministic_action = trainer.compute_single_action(obs, explore=False)
self.assertEqual(trainer.get_policy().global_timestep, 1)
check(trainer.get_policy().global_timestep, 1)
# ts=1-49 (in random window).
random_a = []
for i in range(1, 50):
random_a.append(trainer.compute_single_action(obs, explore=True))
self.assertEqual(trainer.get_policy().global_timestep, i + 1)
check(trainer.get_policy().global_timestep, i + 1)
check(random_a[-1], deterministic_action, false=True)
self.assertTrue(np.std(random_a) > 0.5)

# ts > 50 (a=deterministic_action + scale * N[0,1])
for i in range(50):
a = trainer.compute_single_action(obs, explore=True)
self.assertEqual(trainer.get_policy().global_timestep, i + 51)
check(trainer.get_policy().global_timestep, i + 51)
check(a, deterministic_action, rtol=0.1)

# ts >> 50 (BUT: explore=False -> expect deterministic action).
for i in range(50):
a = trainer.compute_single_action(obs, explore=False)
self.assertEqual(trainer.get_policy().global_timestep, i + 101)
check(trainer.get_policy().global_timestep, i + 101)
check(a, deterministic_action)
trainer.stop()

Expand Down
14 changes: 7 additions & 7 deletions rllib/agents/ddpg/tests/test_td3.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,16 @@ def test_td3_exploration_and_with_random_prerun(self):
trainer = td3.TD3Trainer(config=lcl_config, env="Pendulum-v1")
# Setting explore=False should always return the same action.
a_ = trainer.compute_single_action(obs, explore=False)
self.assertEqual(trainer.get_policy().global_timestep, 1)
check(trainer.get_policy().global_timestep, 1)
for i in range(50):
a = trainer.compute_single_action(obs, explore=False)
self.assertEqual(trainer.get_policy().global_timestep, i + 2)
check(trainer.get_policy().global_timestep, i + 2)
check(a, a_)
# explore=None (default: explore) should return different actions.
actions = []
for i in range(50):
actions.append(trainer.compute_single_action(obs))
self.assertEqual(trainer.get_policy().global_timestep, i + 52)
check(trainer.get_policy().global_timestep, i + 52)
check(np.std(actions), 0.0, false=True)
trainer.stop()

Expand All @@ -77,25 +77,25 @@ def test_td3_exploration_and_with_random_prerun(self):
trainer = td3.TD3Trainer(config=lcl_config, env="Pendulum-v1")
# ts=0 (get a deterministic action as per explore=False).
deterministic_action = trainer.compute_single_action(obs, explore=False)
self.assertEqual(trainer.get_policy().global_timestep, 1)
check(trainer.get_policy().global_timestep, 1)
# ts=1-29 (in random window).
random_a = []
for i in range(1, 30):
random_a.append(trainer.compute_single_action(obs, explore=True))
self.assertEqual(trainer.get_policy().global_timestep, i + 1)
check(trainer.get_policy().global_timestep, i + 1)
check(random_a[-1], deterministic_action, false=True)
self.assertTrue(np.std(random_a) > 0.3)

# ts > 30 (a=deterministic_action + scale * N[0,1])
for i in range(50):
a = trainer.compute_single_action(obs, explore=True)
self.assertEqual(trainer.get_policy().global_timestep, i + 31)
check(trainer.get_policy().global_timestep, i + 31)
check(a, deterministic_action, rtol=0.1)

# ts >> 30 (BUT: explore=False -> expect deterministic action).
for i in range(50):
a = trainer.compute_single_action(obs, explore=False)
self.assertEqual(trainer.get_policy().global_timestep, i + 81)
check(trainer.get_policy().global_timestep, i + 81)
check(a, deterministic_action)
trainer.stop()

Expand Down
9 changes: 0 additions & 9 deletions rllib/agents/impala/vtrace_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,15 +360,6 @@ def from_importance_weights(
rhos = tf.math.exp(log_rhos)
if clip_rho_threshold is not None:
clipped_rhos = tf.minimum(clip_rho_threshold, rhos, name="clipped_rhos")

tf1.summary.histogram("clipped_rhos_1000", tf.minimum(1000.0, rhos))
tf1.summary.scalar(
"num_of_clipped_rhos",
tf.reduce_sum(
tf.cast(tf.equal(clipped_rhos, clip_rho_threshold), tf.int32)
),
)
tf1.summary.scalar("size_of_clipped_rhos", tf.size(clipped_rhos))
else:
clipped_rhos = rhos

Expand Down
Loading

0 comments on commit a849474

Please sign in to comment.