Skip to content

Commit

Permalink
Rename scaler to observation_scaler
Browse files Browse the repository at this point in the history
  • Loading branch information
takuseno committed Oct 14, 2022
1 parent d3239cc commit 3aa1687
Show file tree
Hide file tree
Showing 76 changed files with 506 additions and 462 deletions.
13 changes: 7 additions & 6 deletions d3rlpy/algos/awac.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from ..argument_utility import (
ActionScalerArg,
EncoderArg,
ObservationScalerArg,
QFuncArg,
RewardScalerArg,
ScalerArg,
UseGPUArg,
check_encoder,
check_q_func,
Expand Down Expand Up @@ -69,8 +69,9 @@ class AWAC(AlgoBase):
update_actor_interval (int): interval to update policy function.
use_gpu (bool, int or d3rlpy.gpu.Device):
flag to use GPU, device ID or device.
scaler (d3rlpy.preprocessing.Scaler or str): preprocessor.
The available options are `['pixel', 'min_max', 'standard']`
observation_scaler (d3rlpy.preprocessing.ObservationScaler or str):
observation preprocessor. The available options are
``['pixel', 'min_max', 'standard']``.
action_scaler (d3rlpy.preprocessing.ActionScaler or str):
action preprocessor. The available options are ``['min_max']``.
reward_scaler (d3rlpy.preprocessing.RewardScaler or str):
Expand Down Expand Up @@ -113,7 +114,7 @@ def __init__(
n_critics: int = 2,
update_actor_interval: int = 1,
use_gpu: UseGPUArg = False,
scaler: ScalerArg = None,
observation_scaler: ObservationScalerArg = None,
action_scaler: ActionScalerArg = None,
reward_scaler: RewardScalerArg = None,
impl: Optional[AWACImpl] = None,
Expand All @@ -122,7 +123,7 @@ def __init__(
super().__init__(
batch_size=batch_size,
gamma=gamma,
scaler=scaler,
observation_scaler=observation_scaler,
action_scaler=action_scaler,
reward_scaler=reward_scaler,
kwargs=kwargs,
Expand Down Expand Up @@ -159,7 +160,7 @@ def _create_impl(self, observation_shape: Shape, action_size: int) -> None:
n_action_samples=self._n_action_samples,
n_critics=self._n_critics,
use_gpu=self._use_gpu,
scaler=self._scaler,
observation_scaler=self._observation_scaler,
action_scaler=self._action_scaler,
reward_scaler=self._reward_scaler,
)
Expand Down
28 changes: 15 additions & 13 deletions d3rlpy/algos/bc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from ..argument_utility import (
ActionScalerArg,
EncoderArg,
ScalerArg,
ObservationScalerArg,
UseGPUArg,
check_encoder,
check_use_gpu,
Expand Down Expand Up @@ -34,15 +34,15 @@ def __init__(
encoder_factory: EncoderArg = "default",
batch_size: int = 100,
use_gpu: UseGPUArg = False,
scaler: ScalerArg = None,
observation_scaler: ObservationScalerArg = None,
action_scaler: ActionScalerArg = None,
impl: Optional[BCBaseImpl] = None,
**kwargs: Any
):
super().__init__(
batch_size=batch_size,
gamma=1.0,
scaler=scaler,
observation_scaler=observation_scaler,
action_scaler=action_scaler,
kwargs=kwargs,
)
Expand Down Expand Up @@ -96,8 +96,9 @@ class BC(_BCBase):
``['deterministic', 'stochastic']``.
use_gpu (bool, int or d3rlpy.gpu.Device):
flag to use GPU, device ID or device.
scaler (d3rlpy.preprocessing.Scaler or str): preprocessor.
The available options are `['pixel', 'min_max', 'standard']`.
observation_scaler (d3rlpy.preprocessing.ObservationScaler or str):
observation preprocessor. The available options are
``['pixel', 'min_max', 'standard']``.
action_scaler (d3rlpy.preprocessing.ActionScaler or str):
action scaler. The available options are ``['min_max']``.
impl (d3rlpy.algos.torch.bc_impl.BCImpl):
Expand All @@ -117,7 +118,7 @@ def __init__(
batch_size: int = 100,
policy_type: str = "deterministic",
use_gpu: UseGPUArg = False,
scaler: ScalerArg = None,
observation_scaler: ObservationScalerArg = None,
action_scaler: ActionScalerArg = None,
impl: Optional[BCBaseImpl] = None,
**kwargs: Any
Expand All @@ -128,7 +129,7 @@ def __init__(
encoder_factory=encoder_factory,
batch_size=batch_size,
use_gpu=use_gpu,
scaler=scaler,
observation_scaler=observation_scaler,
action_scaler=action_scaler,
impl=impl,
**kwargs,
Expand All @@ -144,7 +145,7 @@ def _create_impl(self, observation_shape: Shape, action_size: int) -> None:
encoder_factory=self._encoder_factory,
policy_type=self._policy_type,
use_gpu=self._use_gpu,
scaler=self._scaler,
observation_scaler=self._observation_scaler,
action_scaler=self._action_scaler,
)
self._impl.build()
Expand Down Expand Up @@ -179,8 +180,9 @@ class DiscreteBC(_BCBase):
beta (float): reguralization factor.
use_gpu (bool, int or d3rlpy.gpu.Device):
flag to use GPU, device ID or device.
scaler (d3rlpy.preprocessing.Scaler or str): preprocessor.
The available options are `['pixel', 'min_max', 'standard']`
observation_scaler (d3rlpy.preprocessing.ObservationScaler or str):
observation preprocessor. The available options are
``['pixel', 'min_max', 'standard']``.
impl (d3rlpy.algos.torch.bc_impl.DiscreteBCImpl):
implemenation of the algorithm.
Expand All @@ -198,7 +200,7 @@ def __init__(
batch_size: int = 100,
beta: float = 0.5,
use_gpu: UseGPUArg = False,
scaler: ScalerArg = None,
observation_scaler: ObservationScalerArg = None,
impl: Optional[DiscreteBCImpl] = None,
**kwargs: Any
):
Expand All @@ -208,7 +210,7 @@ def __init__(
encoder_factory=encoder_factory,
batch_size=batch_size,
use_gpu=use_gpu,
scaler=scaler,
observation_scaler=observation_scaler,
impl=impl,
**kwargs,
)
Expand All @@ -223,7 +225,7 @@ def _create_impl(self, observation_shape: Shape, action_size: int) -> None:
encoder_factory=self._encoder_factory,
beta=self._beta,
use_gpu=self._use_gpu,
scaler=self._scaler,
observation_scaler=self._observation_scaler,
)
self._impl.build()

Expand Down
24 changes: 13 additions & 11 deletions d3rlpy/algos/bcq.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
from ..argument_utility import (
ActionScalerArg,
EncoderArg,
ObservationScalerArg,
QFuncArg,
RewardScalerArg,
ScalerArg,
UseGPUArg,
check_encoder,
check_q_func,
Expand Down Expand Up @@ -131,8 +131,9 @@ class BCQ(AlgoBase):
beta (float): KL reguralization term for Conditional VAE.
use_gpu (bool, int or d3rlpy.gpu.Device):
flag to use GPU, device ID or device.
scaler (d3rlpy.preprocessing.Scaler or str): preprocessor.
The available options are `['pixel', 'min_max', 'standard']`.
observation_scaler (d3rlpy.preprocessing.ObservationScaler or str):
observation preprocessor. The available options are
``['pixel', 'min_max', 'standard']``.
action_scaler (d3rlpy.preprocessing.ActionScaler or str):
action preprocessor. The available options are ``['min_max']``.
reward_scaler (d3rlpy.preprocessing.RewardScaler or str):
Expand Down Expand Up @@ -187,7 +188,7 @@ def __init__(
rl_start_step: int = 0,
beta: float = 0.5,
use_gpu: UseGPUArg = False,
scaler: ScalerArg = None,
observation_scaler: ObservationScalerArg = None,
action_scaler: ActionScalerArg = None,
reward_scaler: RewardScalerArg = None,
impl: Optional[BCQImpl] = None,
Expand All @@ -196,7 +197,7 @@ def __init__(
super().__init__(
batch_size=batch_size,
gamma=gamma,
scaler=scaler,
observation_scaler=observation_scaler,
action_scaler=action_scaler,
reward_scaler=reward_scaler,
kwargs=kwargs,
Expand Down Expand Up @@ -244,7 +245,7 @@ def _create_impl(self, observation_shape: Shape, action_size: int) -> None:
action_flexibility=self._action_flexibility,
beta=self._beta,
use_gpu=self._use_gpu,
scaler=self._scaler,
observation_scaler=self._observation_scaler,
action_scaler=self._action_scaler,
reward_scaler=self._reward_scaler,
)
Expand Down Expand Up @@ -334,8 +335,9 @@ class DiscreteBCQ(AlgoBase):
target_update_interval (int): interval to update the target network.
use_gpu (bool, int or d3rlpy.gpu.Device):
flag to use GPU, device ID or device.
scaler (d3rlpy.preprocessing.Scaler or str): preprocessor.
The available options are `['pixel', 'min_max', 'standard']`
observation_scaler (d3rlpy.preprocessing.ObservationScaler or str):
observation preprocessor. The available options are
``['pixel', 'min_max', 'standard']``.
reward_scaler (d3rlpy.preprocessing.RewardScaler or str):
reward preprocessor. The available options are
``['clip', 'min_max', 'standard']``.
Expand Down Expand Up @@ -369,15 +371,15 @@ def __init__(
beta: float = 0.5,
target_update_interval: int = 8000,
use_gpu: UseGPUArg = False,
scaler: ScalerArg = None,
observation_scaler: ObservationScalerArg = None,
reward_scaler: RewardScalerArg = None,
impl: Optional[DiscreteBCQImpl] = None,
**kwargs: Any
):
super().__init__(
batch_size=batch_size,
gamma=gamma,
scaler=scaler,
observation_scaler=observation_scaler,
action_scaler=None,
reward_scaler=reward_scaler,
kwargs=kwargs,
Expand Down Expand Up @@ -406,7 +408,7 @@ def _create_impl(self, observation_shape: Shape, action_size: int) -> None:
action_flexibility=self._action_flexibility,
beta=self._beta,
use_gpu=self._use_gpu,
scaler=self._scaler,
observation_scaler=self._observation_scaler,
reward_scaler=self._reward_scaler,
)
self._impl.build()
Expand Down
13 changes: 7 additions & 6 deletions d3rlpy/algos/bear.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from ..argument_utility import (
ActionScalerArg,
EncoderArg,
ObservationScalerArg,
QFuncArg,
RewardScalerArg,
ScalerArg,
UseGPUArg,
check_encoder,
check_q_func,
Expand Down Expand Up @@ -112,8 +112,9 @@ class BEAR(AlgoBase):
function.
use_gpu (bool, int or d3rlpy.gpu.Device):
flag to use GPU, device iD or device.
scaler (d3rlpy.preprocessing.Scaler or str): preprocessor.
The avaiable options are `['pixel', 'min_max', 'standard']`.
observation_scaler (d3rlpy.preprocessing.ObservationScaler or str):
observation preprocessor. The avaiable options are
``['pixel', 'min_max', 'standard']``.
action_scaler (d3rlpy.preprocessing.ActionScaler or str):
action preprocessor. The avaiable options are ``['min_max']``.
reward_scaler (d3rlpy.preprocessing.RewardScaler or str):
Expand Down Expand Up @@ -186,7 +187,7 @@ def __init__(
vae_kl_weight: float = 0.5,
warmup_steps: int = 40000,
use_gpu: UseGPUArg = False,
scaler: ScalerArg = None,
observation_scaler: ObservationScalerArg = None,
action_scaler: ActionScalerArg = None,
reward_scaler: RewardScalerArg = None,
impl: Optional[BEARImpl] = None,
Expand All @@ -195,7 +196,7 @@ def __init__(
super().__init__(
batch_size=batch_size,
gamma=gamma,
scaler=scaler,
observation_scaler=observation_scaler,
action_scaler=action_scaler,
reward_scaler=reward_scaler,
kwargs=kwargs,
Expand Down Expand Up @@ -262,7 +263,7 @@ def _create_impl(self, observation_shape: Shape, action_size: int) -> None:
mmd_sigma=self._mmd_sigma,
vae_kl_weight=self._vae_kl_weight,
use_gpu=self._use_gpu,
scaler=self._scaler,
observation_scaler=self._observation_scaler,
action_scaler=self._action_scaler,
reward_scaler=self._reward_scaler,
)
Expand Down
24 changes: 13 additions & 11 deletions d3rlpy/algos/cql.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from ..argument_utility import (
ActionScalerArg,
EncoderArg,
ObservationScalerArg,
QFuncArg,
RewardScalerArg,
ScalerArg,
UseGPUArg,
check_encoder,
check_q_func,
Expand Down Expand Up @@ -98,8 +98,9 @@ class CQL(AlgoBase):
soft_q_backup (bool): flag to use SAC-style backup.
use_gpu (bool, int or d3rlpy.gpu.Device):
flag to use GPU, device ID or device.
scaler (d3rlpy.preprocessing.Scaler or str): preprocessor.
The available options are `['pixel', 'min_max', 'standard']`.
observation_scaler (d3rlpy.preprocessing.ObservationScaler or str):
observation preprocessor. The available options are
``['pixel', 'min_max', 'standard']``.
action_scaler (d3rlpy.preprocessing.ActionScaler or str):
action preprocessor. The available options are ``['min_max']``.
reward_scaler (d3rlpy.preprocessing.RewardScaler or str):
Expand Down Expand Up @@ -156,7 +157,7 @@ def __init__(
n_action_samples: int = 10,
soft_q_backup: bool = False,
use_gpu: UseGPUArg = False,
scaler: ScalerArg = None,
observation_scaler: ObservationScalerArg = None,
action_scaler: ActionScalerArg = None,
reward_scaler: RewardScalerArg = None,
impl: Optional[CQLImpl] = None,
Expand All @@ -165,7 +166,7 @@ def __init__(
super().__init__(
batch_size=batch_size,
gamma=gamma,
scaler=scaler,
observation_scaler=observation_scaler,
action_scaler=action_scaler,
reward_scaler=reward_scaler,
kwargs=kwargs,
Expand Down Expand Up @@ -217,7 +218,7 @@ def _create_impl(self, observation_shape: Shape, action_size: int) -> None:
n_action_samples=self._n_action_samples,
soft_q_backup=self._soft_q_backup,
use_gpu=self._use_gpu,
scaler=self._scaler,
observation_scaler=self._observation_scaler,
action_scaler=self._action_scaler,
reward_scaler=self._reward_scaler,
)
Expand Down Expand Up @@ -291,8 +292,9 @@ class DiscreteCQL(DoubleDQN):
alpha (float): the :math:`\alpha` value above.
use_gpu (bool, int or d3rlpy.gpu.Device):
flag to use GPU, device ID or device.
scaler (d3rlpy.preprocessing.Scaler or str): preprocessor.
The available options are `['pixel', 'min_max', 'standard']`
observation_scaler (d3rlpy.preprocessing.ObservationScaler or str):
observation preprocessor. The available options are
``['pixel', 'min_max', 'standard']``.
reward_scaler (d3rlpy.preprocessing.RewardScaler or str):
reward preprocessor. The available options are
``['clip', 'min_max', 'standard']``.
Expand All @@ -317,7 +319,7 @@ def __init__(
target_update_interval: int = 8000,
alpha: float = 1.0,
use_gpu: UseGPUArg = False,
scaler: ScalerArg = None,
observation_scaler: ObservationScalerArg = None,
reward_scaler: RewardScalerArg = None,
impl: Optional[DiscreteCQLImpl] = None,
**kwargs: Any,
Expand All @@ -332,7 +334,7 @@ def __init__(
n_critics=n_critics,
target_update_interval=target_update_interval,
use_gpu=use_gpu,
scaler=scaler,
observation_scaler=observation_scaler,
reward_scaler=reward_scaler,
impl=impl,
**kwargs,
Expand All @@ -351,7 +353,7 @@ def _create_impl(self, observation_shape: Shape, action_size: int) -> None:
n_critics=self._n_critics,
alpha=self._alpha,
use_gpu=self._use_gpu,
scaler=self._scaler,
observation_scaler=self._observation_scaler,
reward_scaler=self._reward_scaler,
)
self._impl.build()
Loading

0 comments on commit 3aa1687

Please sign in to comment.