Rename scaler to observation_scaler

takuseno · Oct 14, 2022 · 3aa1687 · 3aa1687
1 parent d3239cc
commit 3aa1687
Show file tree

Hide file tree

Showing 76 changed files with 506 additions and 462 deletions.
diff --git a/d3rlpy/algos/awac.py b/d3rlpy/algos/awac.py
@@ -3,9 +3,9 @@
 from ..argument_utility import (
     ActionScalerArg,
     EncoderArg,
+    ObservationScalerArg,
     QFuncArg,
     RewardScalerArg,
-    ScalerArg,
     UseGPUArg,
     check_encoder,
     check_q_func,
@@ -69,8 +69,9 @@ class AWAC(AlgoBase):
         update_actor_interval (int): interval to update policy function.
         use_gpu (bool, int or d3rlpy.gpu.Device):
             flag to use GPU, device ID or device.
-        scaler (d3rlpy.preprocessing.Scaler or str): preprocessor.
-            The available options are `['pixel', 'min_max', 'standard']`
+        observation_scaler (d3rlpy.preprocessing.ObservationScaler or str):
+            observation preprocessor. The available options are
+            ``['pixel', 'min_max', 'standard']``.
         action_scaler (d3rlpy.preprocessing.ActionScaler or str):
             action preprocessor. The available options are ``['min_max']``.
         reward_scaler (d3rlpy.preprocessing.RewardScaler or str):
@@ -113,7 +114,7 @@ def __init__(
         n_critics: int = 2,
         update_actor_interval: int = 1,
         use_gpu: UseGPUArg = False,
-        scaler: ScalerArg = None,
+        observation_scaler: ObservationScalerArg = None,
         action_scaler: ActionScalerArg = None,
         reward_scaler: RewardScalerArg = None,
         impl: Optional[AWACImpl] = None,
@@ -122,7 +123,7 @@ def __init__(
         super().__init__(
             batch_size=batch_size,
             gamma=gamma,
-            scaler=scaler,
+            observation_scaler=observation_scaler,
             action_scaler=action_scaler,
             reward_scaler=reward_scaler,
             kwargs=kwargs,
@@ -159,7 +160,7 @@ def _create_impl(self, observation_shape: Shape, action_size: int) -> None:
             n_action_samples=self._n_action_samples,
             n_critics=self._n_critics,
             use_gpu=self._use_gpu,
-            scaler=self._scaler,
+            observation_scaler=self._observation_scaler,
             action_scaler=self._action_scaler,
             reward_scaler=self._reward_scaler,
         )

diff --git a/d3rlpy/algos/bc.py b/d3rlpy/algos/bc.py
@@ -5,7 +5,7 @@
 from ..argument_utility import (
     ActionScalerArg,
     EncoderArg,
-    ScalerArg,
+    ObservationScalerArg,
     UseGPUArg,
     check_encoder,
     check_use_gpu,
@@ -34,15 +34,15 @@ def __init__(
         encoder_factory: EncoderArg = "default",
         batch_size: int = 100,
         use_gpu: UseGPUArg = False,
-        scaler: ScalerArg = None,
+        observation_scaler: ObservationScalerArg = None,
         action_scaler: ActionScalerArg = None,
         impl: Optional[BCBaseImpl] = None,
         **kwargs: Any
     ):
         super().__init__(
             batch_size=batch_size,
             gamma=1.0,
-            scaler=scaler,
+            observation_scaler=observation_scaler,
             action_scaler=action_scaler,
             kwargs=kwargs,
         )
@@ -96,8 +96,9 @@ class BC(_BCBase):
             ``['deterministic', 'stochastic']``.
         use_gpu (bool, int or d3rlpy.gpu.Device):
             flag to use GPU, device ID or device.
-        scaler (d3rlpy.preprocessing.Scaler or str): preprocessor.
-            The available options are `['pixel', 'min_max', 'standard']`.
+        observation_scaler (d3rlpy.preprocessing.ObservationScaler or str):
+            observation preprocessor. The available options are
+            ``['pixel', 'min_max', 'standard']``.
         action_scaler (d3rlpy.preprocessing.ActionScaler or str):
             action scaler. The available options are ``['min_max']``.
         impl (d3rlpy.algos.torch.bc_impl.BCImpl):
@@ -117,7 +118,7 @@ def __init__(
         batch_size: int = 100,
         policy_type: str = "deterministic",
         use_gpu: UseGPUArg = False,
-        scaler: ScalerArg = None,
+        observation_scaler: ObservationScalerArg = None,
         action_scaler: ActionScalerArg = None,
         impl: Optional[BCBaseImpl] = None,
         **kwargs: Any
@@ -128,7 +129,7 @@ def __init__(
             encoder_factory=encoder_factory,
             batch_size=batch_size,
             use_gpu=use_gpu,
-            scaler=scaler,
+            observation_scaler=observation_scaler,
             action_scaler=action_scaler,
             impl=impl,
             **kwargs,
@@ -144,7 +145,7 @@ def _create_impl(self, observation_shape: Shape, action_size: int) -> None:
             encoder_factory=self._encoder_factory,
             policy_type=self._policy_type,
             use_gpu=self._use_gpu,
-            scaler=self._scaler,
+            observation_scaler=self._observation_scaler,
             action_scaler=self._action_scaler,
         )
         self._impl.build()
@@ -179,8 +180,9 @@ class DiscreteBC(_BCBase):
         beta (float): reguralization factor.
         use_gpu (bool, int or d3rlpy.gpu.Device):
             flag to use GPU, device ID or device.
-        scaler (d3rlpy.preprocessing.Scaler or str): preprocessor.
-            The available options are `['pixel', 'min_max', 'standard']`
+        observation_scaler (d3rlpy.preprocessing.ObservationScaler or str):
+            observation preprocessor. The available options are
+            ``['pixel', 'min_max', 'standard']``.
         impl (d3rlpy.algos.torch.bc_impl.DiscreteBCImpl):
             implemenation of the algorithm.
 
@@ -198,7 +200,7 @@ def __init__(
         batch_size: int = 100,
         beta: float = 0.5,
         use_gpu: UseGPUArg = False,
-        scaler: ScalerArg = None,
+        observation_scaler: ObservationScalerArg = None,
         impl: Optional[DiscreteBCImpl] = None,
         **kwargs: Any
     ):
@@ -208,7 +210,7 @@ def __init__(
             encoder_factory=encoder_factory,
             batch_size=batch_size,
             use_gpu=use_gpu,
-            scaler=scaler,
+            observation_scaler=observation_scaler,
             impl=impl,
             **kwargs,
         )
@@ -223,7 +225,7 @@ def _create_impl(self, observation_shape: Shape, action_size: int) -> None:
             encoder_factory=self._encoder_factory,
             beta=self._beta,
             use_gpu=self._use_gpu,
-            scaler=self._scaler,
+            observation_scaler=self._observation_scaler,
         )
         self._impl.build()
 

diff --git a/d3rlpy/algos/bcq.py b/d3rlpy/algos/bcq.py
@@ -5,9 +5,9 @@
 from ..argument_utility import (
     ActionScalerArg,
     EncoderArg,
+    ObservationScalerArg,
     QFuncArg,
     RewardScalerArg,
-    ScalerArg,
     UseGPUArg,
     check_encoder,
     check_q_func,
@@ -131,8 +131,9 @@ class BCQ(AlgoBase):
         beta (float): KL reguralization term for Conditional VAE.
         use_gpu (bool, int or d3rlpy.gpu.Device):
             flag to use GPU, device ID or device.
-        scaler (d3rlpy.preprocessing.Scaler or str): preprocessor.
-            The available options are `['pixel', 'min_max', 'standard']`.
+        observation_scaler (d3rlpy.preprocessing.ObservationScaler or str):
+            observation preprocessor. The available options are
+            ``['pixel', 'min_max', 'standard']``.
         action_scaler (d3rlpy.preprocessing.ActionScaler or str):
             action preprocessor. The available options are ``['min_max']``.
         reward_scaler (d3rlpy.preprocessing.RewardScaler or str):
@@ -187,7 +188,7 @@ def __init__(
         rl_start_step: int = 0,
         beta: float = 0.5,
         use_gpu: UseGPUArg = False,
-        scaler: ScalerArg = None,
+        observation_scaler: ObservationScalerArg = None,
         action_scaler: ActionScalerArg = None,
         reward_scaler: RewardScalerArg = None,
         impl: Optional[BCQImpl] = None,
@@ -196,7 +197,7 @@ def __init__(
         super().__init__(
             batch_size=batch_size,
             gamma=gamma,
-            scaler=scaler,
+            observation_scaler=observation_scaler,
             action_scaler=action_scaler,
             reward_scaler=reward_scaler,
             kwargs=kwargs,
@@ -244,7 +245,7 @@ def _create_impl(self, observation_shape: Shape, action_size: int) -> None:
             action_flexibility=self._action_flexibility,
             beta=self._beta,
             use_gpu=self._use_gpu,
-            scaler=self._scaler,
+            observation_scaler=self._observation_scaler,
             action_scaler=self._action_scaler,
             reward_scaler=self._reward_scaler,
         )
@@ -334,8 +335,9 @@ class DiscreteBCQ(AlgoBase):
         target_update_interval (int): interval to update the target network.
         use_gpu (bool, int or d3rlpy.gpu.Device):
             flag to use GPU, device ID or device.
-        scaler (d3rlpy.preprocessing.Scaler or str): preprocessor.
-            The available options are `['pixel', 'min_max', 'standard']`
+        observation_scaler (d3rlpy.preprocessing.ObservationScaler or str):
+            observation preprocessor. The available options are
+            ``['pixel', 'min_max', 'standard']``.
         reward_scaler (d3rlpy.preprocessing.RewardScaler or str):
             reward preprocessor. The available options are
             ``['clip', 'min_max', 'standard']``.
@@ -369,15 +371,15 @@ def __init__(
         beta: float = 0.5,
         target_update_interval: int = 8000,
         use_gpu: UseGPUArg = False,
-        scaler: ScalerArg = None,
+        observation_scaler: ObservationScalerArg = None,
         reward_scaler: RewardScalerArg = None,
         impl: Optional[DiscreteBCQImpl] = None,
         **kwargs: Any
     ):
         super().__init__(
             batch_size=batch_size,
             gamma=gamma,
-            scaler=scaler,
+            observation_scaler=observation_scaler,
             action_scaler=None,
             reward_scaler=reward_scaler,
             kwargs=kwargs,
@@ -406,7 +408,7 @@ def _create_impl(self, observation_shape: Shape, action_size: int) -> None:
             action_flexibility=self._action_flexibility,
             beta=self._beta,
             use_gpu=self._use_gpu,
-            scaler=self._scaler,
+            observation_scaler=self._observation_scaler,
             reward_scaler=self._reward_scaler,
         )
         self._impl.build()

diff --git a/d3rlpy/algos/bear.py b/d3rlpy/algos/bear.py
@@ -3,9 +3,9 @@
 from ..argument_utility import (
     ActionScalerArg,
     EncoderArg,
+    ObservationScalerArg,
     QFuncArg,
     RewardScalerArg,
-    ScalerArg,
     UseGPUArg,
     check_encoder,
     check_q_func,
@@ -112,8 +112,9 @@ class BEAR(AlgoBase):
             function.
         use_gpu (bool, int or d3rlpy.gpu.Device):
             flag to use GPU, device iD or device.
-        scaler (d3rlpy.preprocessing.Scaler or str): preprocessor.
-            The avaiable options are `['pixel', 'min_max', 'standard']`.
+        observation_scaler (d3rlpy.preprocessing.ObservationScaler or str):
+            observation preprocessor. The avaiable options are
+            ``['pixel', 'min_max', 'standard']``.
         action_scaler (d3rlpy.preprocessing.ActionScaler or str):
             action preprocessor. The avaiable options are ``['min_max']``.
         reward_scaler (d3rlpy.preprocessing.RewardScaler or str):
@@ -186,7 +187,7 @@ def __init__(
         vae_kl_weight: float = 0.5,
         warmup_steps: int = 40000,
         use_gpu: UseGPUArg = False,
-        scaler: ScalerArg = None,
+        observation_scaler: ObservationScalerArg = None,
         action_scaler: ActionScalerArg = None,
         reward_scaler: RewardScalerArg = None,
         impl: Optional[BEARImpl] = None,
@@ -195,7 +196,7 @@ def __init__(
         super().__init__(
             batch_size=batch_size,
             gamma=gamma,
-            scaler=scaler,
+            observation_scaler=observation_scaler,
             action_scaler=action_scaler,
             reward_scaler=reward_scaler,
             kwargs=kwargs,
@@ -262,7 +263,7 @@ def _create_impl(self, observation_shape: Shape, action_size: int) -> None:
             mmd_sigma=self._mmd_sigma,
             vae_kl_weight=self._vae_kl_weight,
             use_gpu=self._use_gpu,
-            scaler=self._scaler,
+            observation_scaler=self._observation_scaler,
             action_scaler=self._action_scaler,
             reward_scaler=self._reward_scaler,
         )

diff --git a/d3rlpy/algos/cql.py b/d3rlpy/algos/cql.py
@@ -3,9 +3,9 @@
 from ..argument_utility import (
     ActionScalerArg,
     EncoderArg,
+    ObservationScalerArg,
     QFuncArg,
     RewardScalerArg,
-    ScalerArg,
     UseGPUArg,
     check_encoder,
     check_q_func,
@@ -98,8 +98,9 @@ class CQL(AlgoBase):
         soft_q_backup (bool): flag to use SAC-style backup.
         use_gpu (bool, int or d3rlpy.gpu.Device):
             flag to use GPU, device ID or device.
-        scaler (d3rlpy.preprocessing.Scaler or str): preprocessor.
-            The available options are `['pixel', 'min_max', 'standard']`.
+        observation_scaler (d3rlpy.preprocessing.ObservationScaler or str):
+            observation preprocessor. The available options are
+            ``['pixel', 'min_max', 'standard']``.
         action_scaler (d3rlpy.preprocessing.ActionScaler or str):
             action preprocessor. The available options are ``['min_max']``.
         reward_scaler (d3rlpy.preprocessing.RewardScaler or str):
@@ -156,7 +157,7 @@ def __init__(
         n_action_samples: int = 10,
         soft_q_backup: bool = False,
         use_gpu: UseGPUArg = False,
-        scaler: ScalerArg = None,
+        observation_scaler: ObservationScalerArg = None,
         action_scaler: ActionScalerArg = None,
         reward_scaler: RewardScalerArg = None,
         impl: Optional[CQLImpl] = None,
@@ -165,7 +166,7 @@ def __init__(
         super().__init__(
             batch_size=batch_size,
             gamma=gamma,
-            scaler=scaler,
+            observation_scaler=observation_scaler,
             action_scaler=action_scaler,
             reward_scaler=reward_scaler,
             kwargs=kwargs,
@@ -217,7 +218,7 @@ def _create_impl(self, observation_shape: Shape, action_size: int) -> None:
             n_action_samples=self._n_action_samples,
             soft_q_backup=self._soft_q_backup,
             use_gpu=self._use_gpu,
-            scaler=self._scaler,
+            observation_scaler=self._observation_scaler,
             action_scaler=self._action_scaler,
             reward_scaler=self._reward_scaler,
         )
@@ -291,8 +292,9 @@ class DiscreteCQL(DoubleDQN):
         alpha (float): the :math:`\alpha` value above.
         use_gpu (bool, int or d3rlpy.gpu.Device):
             flag to use GPU, device ID or device.
-        scaler (d3rlpy.preprocessing.Scaler or str): preprocessor.
-            The available options are `['pixel', 'min_max', 'standard']`
+        observation_scaler (d3rlpy.preprocessing.ObservationScaler or str):
+            observation preprocessor. The available options are
+            ``['pixel', 'min_max', 'standard']``.
         reward_scaler (d3rlpy.preprocessing.RewardScaler or str):
             reward preprocessor. The available options are
             ``['clip', 'min_max', 'standard']``.
@@ -317,7 +319,7 @@ def __init__(
         target_update_interval: int = 8000,
         alpha: float = 1.0,
         use_gpu: UseGPUArg = False,
-        scaler: ScalerArg = None,
+        observation_scaler: ObservationScalerArg = None,
         reward_scaler: RewardScalerArg = None,
         impl: Optional[DiscreteCQLImpl] = None,
         **kwargs: Any,
@@ -332,7 +334,7 @@ def __init__(
             n_critics=n_critics,
             target_update_interval=target_update_interval,
             use_gpu=use_gpu,
-            scaler=scaler,
+            observation_scaler=observation_scaler,
             reward_scaler=reward_scaler,
             impl=impl,
             **kwargs,
@@ -351,7 +353,7 @@ def _create_impl(self, observation_shape: Shape, action_size: int) -> None:
             n_critics=self._n_critics,
             alpha=self._alpha,
             use_gpu=self._use_gpu,
-            scaler=self._scaler,
+            observation_scaler=self._observation_scaler,
             reward_scaler=self._reward_scaler,
         )
         self._impl.build()