Skip to content

Commit

Permalink
[rllib] Enable object store memory limit by default (ray-project#5534)
Browse files Browse the repository at this point in the history
  • Loading branch information
ericl authored Aug 26, 2019
1 parent 7d28bbb commit 97ccd75
Show file tree
Hide file tree
Showing 8 changed files with 25 additions and 19 deletions.
6 changes: 0 additions & 6 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -172,12 +172,6 @@ script:
# `cluster_tests.py` runs on Jenkins, not Travis.
- if [ $RAY_CI_TUNE_AFFECTED == "1" ]; then python -m pytest --durations=10 --timeout=300 --ignore=python/ray/tune/tests/test_cluster.py --ignore=python/ray/tune/tests/test_tune_restore.py --ignore=python/ray/tune/tests/test_actor_reuse.py python/ray/tune/tests; fi

# ray rllib tests
- if [ $RAY_CI_RLLIB_AFFECTED == "1" ]; then ./ci/suppress_output python python/ray/rllib/tests/test_catalog.py; fi
- if [ $RAY_CI_RLLIB_AFFECTED == "1" ]; then ./ci/suppress_output python python/ray/rllib/tests/test_filters.py; fi
- if [ $RAY_CI_RLLIB_AFFECTED == "1" ]; then ./ci/suppress_output python python/ray/rllib/tests/test_optimizers.py; fi
- if [ $RAY_CI_RLLIB_AFFECTED == "1" ]; then ./ci/suppress_output python python/ray/rllib/tests/test_evaluators.py; fi

# ray tests
# Python3.5+ only. Otherwise we will get `SyntaxError` regardless of how we set the tester.
- if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest -v --durations=5 --timeout=300 python/ray/experimental/test/async_test.py; fi
Expand Down
12 changes: 12 additions & 0 deletions ci/jenkins_tests/run_rllib_tests.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output python /ray/rllib/tests/test_catalog.py

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output python /ray/rllib/tests/test_optimizers.py

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output python /ray/rllib/tests/test_filters.py

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output python /ray/rllib/tests/test_evaluators.py

docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output python /ray/rllib/tests/test_eager_support.py

Expand Down
2 changes: 1 addition & 1 deletion python/ray/resource_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def resolve(self, is_head):
if memory is None:
memory = (avail_memory - object_store_memory - (redis_max_memory
if is_head else 0))
if memory < 500e6 and memory < 0.05 * system_memory:
if memory < 100e6 and memory < 0.05 * system_memory:
raise ValueError(
"After taking into account object store and redis memory "
"usage, the amount of memory on this node available for "
Expand Down
4 changes: 2 additions & 2 deletions rllib/agents/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,14 +149,14 @@
# Object store memory to reserve for the trainer process. Being large
# enough to fit a few copies of the model weights should be sufficient.
# This is enabled by default since models are typically quite small.
"object_store_memory": 0,
"object_store_memory": 200 * 1024 * 1024,
# Heap memory to reserve for each worker. Should generally be small unless
# your environment is very heavyweight.
"memory_per_worker": 0,
# Object store memory to reserve for each worker. This only needs to be
# large enough to fit a few sample batches at a time. This is enabled
# by default since it almost never needs to be larger than ~200MB.
"object_store_memory_per_worker": 0,
"object_store_memory_per_worker": 200 * 1024 * 1024,

# === Execution ===
# Number of environments to evaluate vectorwise per worker.
Expand Down
10 changes: 5 additions & 5 deletions rllib/tests/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def testGymPreprocessors(self):
self.assertEqual(type(p2), OneHotPreprocessor)

def testTuplePreprocessor(self):
ray.init()
ray.init(object_store_memory=1000 * 1024 * 1024)

class TupleEnv(object):
def __init__(self):
Expand All @@ -78,7 +78,7 @@ def __init__(self):
[float(x) for x in [1, 0, 0, 0, 0, 1, 2, 3]])

def testCustomPreprocessor(self):
ray.init()
ray.init(object_store_memory=1000 * 1024 * 1024)
ModelCatalog.register_custom_preprocessor("foo", CustomPreprocessor)
ModelCatalog.register_custom_preprocessor("bar", CustomPreprocessor2)
env = gym.make("CartPole-v0")
Expand All @@ -90,7 +90,7 @@ def testCustomPreprocessor(self):
self.assertEqual(type(p3), NoPreprocessor)

def testDefaultModels(self):
ray.init()
ray.init(object_store_memory=1000 * 1024 * 1024)

with tf.variable_scope("test1"):
p1 = ModelCatalog.get_model({
Expand All @@ -106,7 +106,7 @@ def testDefaultModels(self):
self.assertEqual(type(p2), VisionNetwork)

def testCustomModel(self):
ray.init()
ray.init(object_store_memory=1000 * 1024 * 1024)
ModelCatalog.register_custom_model("foo", CustomModel)
p1 = ModelCatalog.get_model({
"obs": tf.constant([1, 2, 3])
Expand All @@ -118,7 +118,7 @@ def testCustomActionDistribution(self):
class Model():
pass

ray.init()
ray.init(object_store_memory=1000 * 1024 * 1024)
# registration
ModelCatalog.register_custom_action_dist("test",
CustomActionDistribution)
Expand Down
2 changes: 1 addition & 1 deletion rllib/tests/test_evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def env_creator(env_config):
agent_classes = [DQNTrainer, A3CTrainer]

for agent_cls in agent_classes:
ray.init()
ray.init(object_store_memory=1000 * 1024 * 1024)
register_env("CartPoleWrapped-v0", env_creator)
agent = agent_cls(
env="CartPoleWrapped-v0",
Expand Down
2 changes: 1 addition & 1 deletion rllib/tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def testBasic(self):

class FilterManagerTest(unittest.TestCase):
def setUp(self):
ray.init(num_cpus=1)
ray.init(num_cpus=1, object_store_memory=1000 * 1024 * 1024)

def tearDown(self):
ray.shutdown()
Expand Down
6 changes: 3 additions & 3 deletions rllib/tests/test_optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def tearDown(self):
ray.shutdown()

def testBasic(self):
ray.init(num_cpus=4)
ray.init(num_cpus=4, object_store_memory=1000 * 1024 * 1024)
local = _MockWorker()
remotes = ray.remote(_MockWorker)
remote_workers = [remotes.remote() for i in range(5)]
Expand All @@ -41,7 +41,7 @@ def tearDown(self):
ray.shutdown()

def testPPOSampleWaste(self):
ray.init(num_cpus=4)
ray.init(num_cpus=4, object_store_memory=1000 * 1024 * 1024)

# Check we at least collect the initial wave of samples
ppo = PPOTrainer(
Expand Down Expand Up @@ -101,7 +101,7 @@ def tearDownClass(cls):

@classmethod
def setUpClass(cls):
ray.init(num_cpus=8)
ray.init(num_cpus=8, object_store_memory=1000 * 1024 * 1024)

def testSimple(self):
local, remotes = self._make_envs()
Expand Down

0 comments on commit 97ccd75

Please sign in to comment.