Skip to content

Commit

Permalink
Revert "[RLlib] Reparameterize the construction of TrainerRunner and …
Browse files Browse the repository at this point in the history
…RLTrainers (ray-project#31991)" (ray-project#32130)

Reverts ray-project#31991

This PR seems to have broken CI.

Screenshot 2023-01-31 at 1 39 09 PM

The error is https://buildkite.com/ray-project/oss-ci-build-branch/builds/2099#01860972-e02e-47c4-8f86-8be28ea18d92/3786-3992
AttributeError: '_TFStub' object has no attribute 'Tensor'
architkulkarni authored Jan 31, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent 10d52f7 commit d15ccfc
Showing 15 changed files with 319 additions and 436 deletions.
9 changes: 8 additions & 1 deletion rllib/BUILD
Original file line number Diff line number Diff line change
@@ -1846,10 +1846,17 @@ py_test(
py_test(
name = "test_trainer_runner",
tags = ["team:rllib", "multi_gpu", "exclusive"],
size = "large",
size = "medium",
srcs = ["core/rl_trainer/tests/test_trainer_runner.py"]
)

py_test(
name = "test_trainer_runner_local",
tags = ["team:rllib", "core", "exclusive"],
size = "medium",
srcs = ["core/rl_trainer/tests/test_trainer_runner_local.py"]
)

py_test(
name = "test_trainer_runner_config",
tags = ["team:rllib", "core"],
46 changes: 3 additions & 43 deletions rllib/algorithms/algorithm_config.py
Original file line number Diff line number Diff line change
@@ -17,7 +17,6 @@
import ray
from ray.rllib.algorithms.callbacks import DefaultCallbacks
from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
from ray.rllib.core.rl_trainer.rl_trainer import RLTrainerHPs
from ray.rllib.core.rl_trainer.trainer_runner_config import (
TrainerRunnerConfig,
ModuleSpec,
@@ -243,9 +242,6 @@ def __init__(self, algo_class=None):
self.num_gpus_per_worker = 0
self._fake_gpus = False
self.num_cpus_for_local_worker = 1
self.num_trainer_workers = 0
self.num_gpus_per_trainer_worker = 0
self.num_cpus_per_trainer_worker = 1
self.custom_resources_per_worker = {}
self.placement_strategy = "PACK"

@@ -322,10 +318,6 @@ def __init__(self, algo_class=None):
self.max_requests_in_flight_per_sampler_worker = 2
self.rl_trainer_class = None
self._enable_rl_trainer_api = False
# experimental: this will contain the hyper-parameters that are passed to the
# RLTrainer, for computing loss, etc. New algorithms have to set this to their
# own default. .training() will modify the fields of this object.
self._rl_trainer_hps = RLTrainerHPs()

# `self.callbacks()`
self.callbacks_class = DefaultCallbacks
@@ -451,10 +443,6 @@ def __init__(self, algo_class=None):
self.soft_horizon = DEPRECATED_VALUE
self.no_done_at_end = DEPRECATED_VALUE

@property
def rl_trainer_hps(self) -> RLTrainerHPs:
return self._rl_trainer_hps

def to_dict(self) -> AlgorithmConfigDict:
"""Converts all settings into a legacy config dict for backward compatibility.
@@ -959,9 +947,6 @@ def resources(
num_cpus_per_worker: Optional[Union[float, int]] = NotProvided,
num_gpus_per_worker: Optional[Union[float, int]] = NotProvided,
num_cpus_for_local_worker: Optional[int] = NotProvided,
num_trainer_workers: Optional[int] = NotProvided,
num_cpus_per_trainer_worker: Optional[Union[float, int]] = NotProvided,
num_gpus_per_trainer_worker: Optional[Union[float, int]] = NotProvided,
custom_resources_per_worker: Optional[dict] = NotProvided,
placement_strategy: Optional[str] = NotProvided,
) -> "AlgorithmConfig":
@@ -981,20 +966,6 @@ def resources(
fractional. This is usually needed only if your env itself requires a
GPU (i.e., it is a GPU-intensive video game), or model inference is
unusually expensive.
num_trainer_workers: Number of workers used for training. A value of 0
means training will take place on a local worker on head node CPUs or 1
GPU (determined by `num_gpus_per_trainer_worker`). For multi-gpu
training, set number of workers greater than 1 and set
`num_gpus_per_trainer_worker` accordingly (e.g. 4 GPUs total, and model
needs 2 GPUs: `num_trainer_workers = 2` and
`num_gpus_per_trainer_worker = 2`)
num_cpus_per_trainer_worker: Number of CPUs allocated per trainer worker.
Only necessary for custom processing pipeline inside each RLTrainer
requiring multiple CPU cores. Ignored if `num_trainer_workers = 0`.
num_gpus_per_trainer_worker: Number of GPUs allocated per worker. If
`num_trainer_workers = 0`, any value greater than 0 will run the
training on a single GPU on the head node, while a value of 0 will run
the training on head node CPU cores.
custom_resources_per_worker: Any custom Ray resources to allocate per
worker.
num_cpus_for_local_worker: Number of CPUs to allocate for the algorithm.
@@ -1035,13 +1006,6 @@ def resources(
if placement_strategy is not NotProvided:
self.placement_strategy = placement_strategy

if num_trainer_workers is not NotProvided:
self.num_trainer_workers = num_trainer_workers
if num_cpus_per_trainer_worker is not NotProvided:
self.num_cpus_per_trainer_worker = num_cpus_per_trainer_worker
if num_gpus_per_trainer_worker is not NotProvided:
self.num_gpus_per_trainer_worker = num_gpus_per_trainer_worker

return self

def framework(
@@ -2681,16 +2645,12 @@ def get_trainer_runner_config(
.module(module_spec)
.trainer(
trainer_class=self.rl_trainer_class,
eager_tracing=self.eager_tracing,
# TODO (Kourosh): optimizer config can now be more complicated.
optimizer_config={"lr": self.lr},
rl_trainer_hps=self.rl_trainer_hps,
)
.resources(
num_trainer_workers=self.num_trainer_workers,
num_cpus_per_trainer_worker=self.num_cpus_per_trainer_worker,
num_gpus_per_trainer_worker=self.num_gpus_per_trainer_worker,
)
.framework(eager_tracing=self.eager_tracing)
.resources(num_gpus=self.num_gpus, fake_gpus=self._fake_gpus)
.algorithm(algorithm_config=self)
)

return config
93 changes: 10 additions & 83 deletions rllib/core/rl_trainer/rl_trainer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import abc

from dataclasses import dataclass, field
import logging
import numpy as np
from typing import (
@@ -15,6 +14,7 @@
Tuple,
Type,
Union,
TYPE_CHECKING,
)

from ray.rllib.utils.framework import try_import_tf, try_import_torch
@@ -23,7 +23,6 @@
ModuleID,
SingleAgentRLModuleSpec,
)

from ray.rllib.core.rl_module.marl_module import (
MultiAgentRLModule,
MultiAgentRLModuleSpec,
@@ -32,8 +31,10 @@
from ray.rllib.utils.nested_dict import NestedDict
from ray.rllib.utils.numpy import convert_to_numpy
from ray.rllib.utils.typing import TensorType
from ray.rllib.core.rl_trainer.scaling_config import TrainerScalingConfig

if TYPE_CHECKING:
from ray.air.config import ScalingConfig
from ray.rllib.algorithms.algorithm_config import AlgorithmConfig

torch, _ = try_import_torch()
tf1, tf, tfv = try_import_tf()
@@ -47,32 +48,6 @@
ParamDictType = Dict[ParamRef, ParamType]


@dataclass
class FrameworkHPs:
"""The framework specific hyper-parameters.
Args:
eager_tracing: Whether to trace the model in eager mode. This enables tf
tracing mode by wrapping the loss function computation in a tf.function.
This is useful for speeding up the training loop. However, it is not
compatible with all tf operations. For example, tf.print is not supported
in tf.function.
"""

eager_tracing: bool = False


@dataclass
class RLTrainerHPs:
"""The hyper-parameters for RLTrainer.
When creating a new RLTrainer, the new hyper-parameters have to be defined by
subclassing this class and adding the new hyper-parameters as fields.
"""

pass


class RLTrainer:
"""Base class for RLlib algorithm trainers.
@@ -143,9 +118,9 @@ def __init__(
] = None,
module: Optional[RLModule] = None,
optimizer_config: Mapping[str, Any] = None,
trainer_scaling_config: TrainerScalingConfig = TrainerScalingConfig(),
trainer_hyperparameters: Optional[RLTrainerHPs] = RLTrainerHPs(),
framework_hyperparameters: Optional[FrameworkHPs] = FrameworkHPs(),
distributed: bool = False,
scaling_config: Optional["ScalingConfig"] = None,
algorithm_config: Optional["AlgorithmConfig"] = None,
):
# TODO (Kourosh): Having the entire algorithm_config inside trainer may not be
# the best idea in the world, but it's easy to implement and user will
@@ -165,10 +140,9 @@ def __init__(
self.module_spec = module_spec
self.module_obj = module
self.optimizer_config = optimizer_config
self.config = trainer_hyperparameters

# pick the configs that we need for the trainer from scaling config
self._distributed = trainer_scaling_config.num_workers > 1
self.distributed = distributed
self.scaling_config = scaling_config
self.config = algorithm_config

# These are the attributes that are set during build
self._module: MultiAgentRLModule = None
@@ -177,10 +151,6 @@ def __init__(
self._param_to_optim: Dict[ParamRef, Optimizer] = {}
self._params: ParamDictType = {}

@property
def distributed(self) -> bool:
return self._distributed

@property
def module(self) -> MultiAgentRLModule:
return self._module
@@ -641,46 +611,3 @@ def __check_if_build_called(self):
"RLTrainer.build() must be called after constructing a "
"RLTrainer and before calling any methods on it."
)


@dataclass
class RLTrainerSpec:
"""The spec for construcitng RLTrainer actors.
Args:
rl_trainer_class: The RLTrainer class to use.
module_spec: The underlying (MA)RLModule spec to completely define the module.
module: Alternatively the RLModule instance can be passed in directly. This
only works if the RLTrainer is not an actor.
backend_config: The backend config for properly distributing the RLModule.
optimizer_config: The optimizer setting to apply during training.
trainer_hyperparameters: The extra config for the loss/additional update. This
should be a subclass of RLTrainerHPs. This is useful for passing in
algorithm configs that contains the hyper-parameters for loss computation,
change of training behaviors, etc. e.g lr, entropy_coeff.
"""

rl_trainer_class: Type["RLTrainer"]
module_spec: Union["SingleAgentRLModuleSpec", "MultiAgentRLModuleSpec"] = None
module: Optional["RLModule"] = None
trainer_scaling_config: TrainerScalingConfig = field(
default_factory=TrainerScalingConfig
)
optimizer_config: Dict[str, Any] = field(default_factory=dict)
trainer_hyperparameters: RLTrainerHPs = field(default_factory=RLTrainerHPs)
framework_hyperparameters: FrameworkHPs = field(default_factory=FrameworkHPs)

def get_params_dict(self) -> Dict[str, Any]:
"""Returns the parameters than be passed to the RLTrainer constructor."""
return {
"module": self.module,
"module_spec": self.module_spec,
"trainer_scaling_config": self.trainer_scaling_config,
"optimizer_config": self.optimizer_config,
"trainer_hyperparameters": self.trainer_hyperparameters,
"framework_hyperparameters": self.framework_hyperparameters,
}

def build(self) -> "RLTrainer":
"""Builds the RLTrainer instance."""
return self.rl_trainer_class(**self.get_params_dict())
21 changes: 0 additions & 21 deletions rllib/core/rl_trainer/scaling_config.py

This file was deleted.

10 changes: 7 additions & 3 deletions rllib/core/rl_trainer/tests/test_rl_trainer.py
Original file line number Diff line number Diff line change
@@ -11,12 +11,16 @@
from ray.rllib.core.testing.tf.bc_rl_trainer import BCTfRLTrainer
from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
from ray.rllib.utils.test_utils import check, get_cartpole_dataset_reader
from ray.rllib.core.rl_trainer.scaling_config import TrainerScalingConfig


def get_trainer() -> RLTrainer:
def get_trainer(distributed=False) -> RLTrainer:
env = gym.make("CartPole-v1")

# TODO: Another way to make RLTrainer would be to construct the module first
# and then apply trainer to it. We should also allow that. In fact if we figure
# out the serialization of RLModules we can simply pass the module the trainer
# and internally it will serialize and deserialize the module for distributed
# construction.
trainer = BCTfRLTrainer(
module_spec=SingleAgentRLModuleSpec(
module_class=DiscreteBCTFModule,
@@ -25,7 +29,7 @@ def get_trainer() -> RLTrainer:
model_config={"hidden_dim": 32},
),
optimizer_config={"lr": 1e-3},
trainer_scaling_config=TrainerScalingConfig(),
distributed=distributed,
)

trainer.build()
Loading

0 comments on commit d15ccfc

Please sign in to comment.