diff --git a/README.md b/README.md index 3aa72f4..a7be678 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,13 @@ This is not an official Google product. There are several options for getting started with Dopamine, depending on your use case. +### Prerequisites + +* Atari: To run Atari environments with Dopamine, install the atari roms +following the instructions from [atari-py](https://github.com/openai/atari-py#roms). +* Mujoco: To use Dopamine for continuous control, install Mujoco and get a +license. See instructions [here](https://github.com/openai/mujoco-py#install-mujoco). + ### Installing from Source @@ -85,7 +92,7 @@ export PYTHONPATH=$PYTHONPATH:$PWD python -m tests.dopamine.atari_init_test ``` -### Next Steps +## Next Steps View the [docs][docs] for more information on training agents. @@ -94,7 +101,7 @@ We supply [baselines][baselines] for each Dopamine agent. We also provide a set of [Colaboratory notebooks](https://github.com/google/dopamine/tree/master/dopamine/colab) which demonstrate how to use Dopamine. -### References +## References [Bellemare et al., *The Arcade Learning Environment: An evaluation platform for general agents*. Journal of Artificial Intelligence Research, 2013.][ale] @@ -115,7 +122,7 @@ Conference on Learning Representations, 2016.][prioritized_replay] [Haarnoja et al., *Soft Actor-Critic Algorithms and Applications*, arXiv preprint arXiv:1812.05905, 2018.][sac] -### Giving credit +## Giving credit If you use Dopamine in your work, we ask that you cite our [white paper][dopamine_paper]. Here is an example BibTeX entry: diff --git a/dopamine/continuous_domains/train.py b/dopamine/continuous_domains/train.py index c0ad412..dd2bce6 100644 --- a/dopamine/continuous_domains/train.py +++ b/dopamine/continuous_domains/train.py @@ -41,6 +41,9 @@ def main(unused_argv): unused_argv: Arguments (unused). """ logging.set_verbosity(logging.INFO) + base_dir = FLAGS.base_dir + gin_files = FLAGS.gin_files + gin_bindings = FLAGS.gin_bindings run_experiment.load_gin_configs(gin_files, gin_bindings) runner = run_experiment.create_continuous_runner(base_dir) diff --git a/dopamine/discrete_domains/gym_lib.py b/dopamine/discrete_domains/gym_lib.py index 8770bc6..4ff8b9c 100644 --- a/dopamine/discrete_domains/gym_lib.py +++ b/dopamine/discrete_domains/gym_lib.py @@ -58,6 +58,9 @@ gin.constant('gym_lib.MOUNTAINCAR_STACK_SIZE', 1) +MUJOCO_GAMES = ('Ant', 'HalfCheetah', 'Hopper', 'Humanoid', 'Walker2d') + + @gin.configurable def create_gym_environment(environment_name=None, version='v0'): """Wraps a Gym environment with some basic preprocessing. @@ -70,6 +73,8 @@ def create_gym_environment(environment_name=None, version='v0'): A Gym environment with some standard preprocessing. """ assert environment_name is not None + + full_game_name = '{}-{}'.format(environment_name, version) env = gym.make(full_game_name) # Strip out the TimeLimit wrapper from Gym, which caps us at 200 steps. diff --git a/dopamine/jax/agents/sac/configs/sac.gin b/dopamine/jax/agents/sac/configs/sac.gin index e1c892e..04e2bf4 100644 --- a/dopamine/jax/agents/sac/configs/sac.gin +++ b/dopamine/jax/agents/sac/configs/sac.gin @@ -28,12 +28,10 @@ create_optimizer.beta1 = 0.9 create_optimizer.beta2 = 0.999 create_optimizer.eps = 1.0e-8 -create_gym_environment.environment_name = 'DM-HalfCheetah' +create_gym_environment.environment_name = 'HalfCheetah' create_gym_environment.version = 'v2' create_continuous_runner.schedule = 'continuous_train_and_eval' create_continuous_agent.agent_name = 'sac' -deepmind_control_lib.create_deepmind_control_environment.use_image_observations = False -DeepmindControlPreprocessing.action_repeat = 1 ContinuousTrainRunner.create_environment_fn = @gym_lib.create_gym_environment ContinuousRunner.num_iterations = 3200 ContinuousRunner.training_steps = 1000 @@ -43,3 +41,4 @@ ContinuousRunner.clip_rewards = False circular_replay_buffer.OutOfGraphReplayBuffer.replay_capacity = 1000000 circular_replay_buffer.OutOfGraphReplayBuffer.batch_size = 256 + diff --git a/requirements.txt b/requirements.txt index 70b12c2..cee5649 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,6 +24,7 @@ kiwisolver>=1.2.0 Markdown>=3.2.2 matplotlib>=3.3.0 msgpack>=1.0.0 +mujoco-py<2.1,>=2.0 numpy>=1.18.5 oauthlib>=3.1.0 opencv-python>=4.3.0.36 @@ -48,6 +49,7 @@ tensorboard tensorboard-plugin-wit tensorflow tensorflow-estimator +tensorflow-probability>=0.13.0 termcolor>=1.1.0 tf-slim>=1.1.0 urllib3>=1.25.10