Upgrade code to Python 3.7+ syntax using pyupgrade (DLR-RM#887)

* Upgrade code to Python 3.7+ syntax * Update changelog
stealthpouyan · Apr 25, 2022 · a6f5049 · a6f5049
1 parent 061841a
commit a6f5049
Show file tree

Hide file tree

Showing 44 changed files with 129 additions and 129 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 #
 # Configuration file for the Sphinx documentation builder.
 #
@@ -46,7 +45,7 @@ def __getattr__(cls, name):
 
 # Read version from file
 version_file = os.path.join(os.path.dirname(__file__), "../stable_baselines3", "version.txt")
-with open(version_file, "r") as file_handler:
+with open(version_file) as file_handler:
     __version__ = file_handler.read().strip()
 
 # -- Project information -----------------------------------------------------

diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -4,7 +4,7 @@ Changelog
 ==========
 
 
-Release 1.5.1a4 (WIP)
+Release 1.5.1a5 (WIP)
 ---------------------------
 
 Breaking Changes:
@@ -31,6 +31,7 @@ Deprecations:
 
 Others:
 ^^^^^^^
+- Upgraded to Python 3.7+ syntax using ``pyupgrade``
 
 Documentation:
 ^^^^^^^^^^^^^^

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 from setuptools import find_packages, setup
 
-with open(os.path.join("stable_baselines3", "version.txt"), "r") as file_handler:
+with open(os.path.join("stable_baselines3", "version.txt")) as file_handler:
     __version__ = file_handler.read().strip()
 
 

diff --git a/stable_baselines3/__init__.py b/stable_baselines3/__init__.py
@@ -11,7 +11,7 @@
 
 # Read version from file
 version_file = os.path.join(os.path.dirname(__file__), "version.txt")
-with open(version_file, "r") as file_handler:
+with open(version_file) as file_handler:
     __version__ = file_handler.read().strip()
 
 

diff --git a/stable_baselines3/a2c/a2c.py b/stable_baselines3/a2c/a2c.py
@@ -82,7 +82,7 @@ def __init__(
         _init_setup_model: bool = True,
     ):
 
-        super(A2C, self).__init__(
+        super().__init__(
             policy,
             env,
             learning_rate=learning_rate,
@@ -194,7 +194,7 @@ def learn(
         reset_num_timesteps: bool = True,
     ) -> "A2C":
 
-        return super(A2C, self).learn(
+        return super().learn(
             total_timesteps=total_timesteps,
             callback=callback,
             log_interval=log_interval,

diff --git a/stable_baselines3/common/atari_wrappers.py b/stable_baselines3/common/atari_wrappers.py
@@ -245,4 +245,4 @@ def __init__(
         if clip_reward:
             env = ClipRewardEnv(env)
 
-        super(AtariWrapper, self).__init__(env)
+        super().__init__(env)
diff --git a/stable_baselines3/common/buffers.py b/stable_baselines3/common/buffers.py
@@ -42,7 +42,7 @@ def __init__(
         device: Union[th.device, str] = "cpu",
         n_envs: int = 1,
     ):
-        super(BaseBuffer, self).__init__()
+        super().__init__()
         self.buffer_size = buffer_size
         self.observation_space = observation_space
         self.action_space = action_space
@@ -179,7 +179,7 @@ def __init__(
         optimize_memory_usage: bool = False,
         handle_timeout_termination: bool = True,
     ):
-        super(ReplayBuffer, self).__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
+        super().__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
 
         # Adjust buffer size
         self.buffer_size = max(buffer_size // n_envs, 1)
@@ -339,7 +339,7 @@ def __init__(
         n_envs: int = 1,
     ):
 
-        super(RolloutBuffer, self).__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
+        super().__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
         self.gae_lambda = gae_lambda
         self.gamma = gamma
         self.observations, self.actions, self.rewards, self.advantages = None, None, None, None
@@ -358,7 +358,7 @@ def reset(self) -> None:
         self.log_probs = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
         self.advantages = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
         self.generator_ready = False
-        super(RolloutBuffer, self).reset()
+        super().reset()
 
     def compute_returns_and_advantage(self, last_values: th.Tensor, dones: np.ndarray) -> None:
         """

diff --git a/stable_baselines3/common/callbacks.py b/stable_baselines3/common/callbacks.py
@@ -19,7 +19,7 @@ class BaseCallback(ABC):
     """
 
     def __init__(self, verbose: int = 0):
-        super(BaseCallback, self).__init__()
+        super().__init__()
         # The RL model
         self.model = None  # type: Optional[base_class.BaseAlgorithm]
         # An alias for self.model.get_env(), the environment used for training
@@ -127,14 +127,14 @@ class EventCallback(BaseCallback):
     """
 
     def __init__(self, callback: Optional[BaseCallback] = None, verbose: int = 0):
-        super(EventCallback, self).__init__(verbose=verbose)
+        super().__init__(verbose=verbose)
         self.callback = callback
         # Give access to the parent
         if callback is not None:
             self.callback.parent = self
 
     def init_callback(self, model: "base_class.BaseAlgorithm") -> None:
-        super(EventCallback, self).init_callback(model)
+        super().init_callback(model)
         if self.callback is not None:
             self.callback.init_callback(self.model)
 
@@ -169,7 +169,7 @@ class CallbackList(BaseCallback):
     """
 
     def __init__(self, callbacks: List[BaseCallback]):
-        super(CallbackList, self).__init__()
+        super().__init__()
         assert isinstance(callbacks, list)
         self.callbacks = callbacks
 
@@ -228,7 +228,7 @@ class CheckpointCallback(BaseCallback):
     """
 
     def __init__(self, save_freq: int, save_path: str, name_prefix: str = "rl_model", verbose: int = 0):
-        super(CheckpointCallback, self).__init__(verbose)
+        super().__init__(verbose)
         self.save_freq = save_freq
         self.save_path = save_path
         self.name_prefix = name_prefix
@@ -256,7 +256,7 @@ class ConvertCallback(BaseCallback):
     """
 
     def __init__(self, callback: Callable[[Dict[str, Any], Dict[str, Any]], bool], verbose: int = 0):
-        super(ConvertCallback, self).__init__(verbose)
+        super().__init__(verbose)
         self.callback = callback
 
     def _on_step(self) -> bool:
@@ -307,7 +307,7 @@ def __init__(
         verbose: int = 1,
         warn: bool = True,
     ):
-        super(EvalCallback, self).__init__(callback_after_eval, verbose=verbose)
+        super().__init__(callback_after_eval, verbose=verbose)
 
         self.callback_on_new_best = callback_on_new_best
         if self.callback_on_new_best is not None:
@@ -480,7 +480,7 @@ class StopTrainingOnRewardThreshold(BaseCallback):
     """
 
     def __init__(self, reward_threshold: float, verbose: int = 0):
-        super(StopTrainingOnRewardThreshold, self).__init__(verbose=verbose)
+        super().__init__(verbose=verbose)
         self.reward_threshold = reward_threshold
 
     def _on_step(self) -> bool:
@@ -505,7 +505,7 @@ class EveryNTimesteps(EventCallback):
     """
 
     def __init__(self, n_steps: int, callback: BaseCallback):
-        super(EveryNTimesteps, self).__init__(callback)
+        super().__init__(callback)
         self.n_steps = n_steps
         self.last_time_trigger = 0
 
@@ -528,7 +528,7 @@ class StopTrainingOnMaxEpisodes(BaseCallback):
     """
 
     def __init__(self, max_episodes: int, verbose: int = 0):
-        super(StopTrainingOnMaxEpisodes, self).__init__(verbose=verbose)
+        super().__init__(verbose=verbose)
         self.max_episodes = max_episodes
         self._total_max_episodes = max_episodes
         self.n_episodes = 0
@@ -573,7 +573,7 @@ class StopTrainingOnNoModelImprovement(BaseCallback):
     """
 
     def __init__(self, max_no_improvement_evals: int, min_evals: int = 0, verbose: int = 0):
-        super(StopTrainingOnNoModelImprovement, self).__init__(verbose=verbose)
+        super().__init__(verbose=verbose)
         self.max_no_improvement_evals = max_no_improvement_evals
         self.min_evals = min_evals
         self.last_best_mean_reward = -np.inf

diff --git a/stable_baselines3/common/distributions.py b/stable_baselines3/common/distributions.py
@@ -16,7 +16,7 @@ class Distribution(ABC):
     """Abstract base class for distributions."""
 
     def __init__(self):
-        super(Distribution, self).__init__()
+        super().__init__()
         self.distribution = None
 
     @abstractmethod
@@ -120,7 +120,7 @@ class DiagGaussianDistribution(Distribution):
     """
 
     def __init__(self, action_dim: int):
-        super(DiagGaussianDistribution, self).__init__()
+        super().__init__()
         self.action_dim = action_dim
         self.mean_actions = None
         self.log_std = None
@@ -201,13 +201,13 @@ class SquashedDiagGaussianDistribution(DiagGaussianDistribution):
     """
 
     def __init__(self, action_dim: int, epsilon: float = 1e-6):
-        super(SquashedDiagGaussianDistribution, self).__init__(action_dim)
+        super().__init__(action_dim)
         # Avoid NaN (prevents division by zero or log of zero)
         self.epsilon = epsilon
         self.gaussian_actions = None
 
     def proba_distribution(self, mean_actions: th.Tensor, log_std: th.Tensor) -> "SquashedDiagGaussianDistribution":
-        super(SquashedDiagGaussianDistribution, self).proba_distribution(mean_actions, log_std)
+        super().proba_distribution(mean_actions, log_std)
         return self
 
     def log_prob(self, actions: th.Tensor, gaussian_actions: Optional[th.Tensor] = None) -> th.Tensor:
@@ -219,7 +219,7 @@ def log_prob(self, actions: th.Tensor, gaussian_actions: Optional[th.Tensor] = N
             gaussian_actions = TanhBijector.inverse(actions)
 
         # Log likelihood for a Gaussian distribution
-        log_prob = super(SquashedDiagGaussianDistribution, self).log_prob(gaussian_actions)
+        log_prob = super().log_prob(gaussian_actions)
         # Squash correction (from original SAC implementation)
         # this comes from the fact that tanh is bijective and differentiable
         log_prob -= th.sum(th.log(1 - actions**2 + self.epsilon), dim=1)
@@ -254,7 +254,7 @@ class CategoricalDistribution(Distribution):
     """
 
     def __init__(self, action_dim: int):
-        super(CategoricalDistribution, self).__init__()
+        super().__init__()
         self.action_dim = action_dim
 
     def proba_distribution_net(self, latent_dim: int) -> nn.Module:
@@ -305,7 +305,7 @@ class MultiCategoricalDistribution(Distribution):
     """
 
     def __init__(self, action_dims: List[int]):
-        super(MultiCategoricalDistribution, self).__init__()
+        super().__init__()
         self.action_dims = action_dims
 
     def proba_distribution_net(self, latent_dim: int) -> nn.Module:
@@ -360,7 +360,7 @@ class BernoulliDistribution(Distribution):
     """
 
     def __init__(self, action_dims: int):
-        super(BernoulliDistribution, self).__init__()
+        super().__init__()
         self.action_dims = action_dims
 
     def proba_distribution_net(self, latent_dim: int) -> nn.Module:
@@ -433,7 +433,7 @@ def __init__(
         learn_features: bool = False,
         epsilon: float = 1e-6,
     ):
-        super(StateDependentNoiseDistribution, self).__init__()
+        super().__init__()
         self.action_dim = action_dim
         self.latent_sde_dim = None
         self.mean_actions = None
@@ -597,7 +597,7 @@ def log_prob_from_params(
         return actions, log_prob
 
 
-class TanhBijector(object):
+class TanhBijector:
     """
     Bijective transformation of a probability distribution
     using a squashing function (tanh)
@@ -607,7 +607,7 @@ class TanhBijector(object):
     """
 
     def __init__(self, epsilon: float = 1e-6):
-        super(TanhBijector, self).__init__()
+        super().__init__()
         self.epsilon = epsilon
 
     @staticmethod

diff --git a/stable_baselines3/common/envs/bit_flipping_env.py b/stable_baselines3/common/envs/bit_flipping_env.py
@@ -36,7 +36,7 @@ def __init__(
         image_obs_space: bool = False,
         channel_first: bool = True,
     ):
-        super(BitFlippingEnv, self).__init__()
+        super().__init__()
         # Shape of the observation when using image space
         self.image_shape = (1, 36, 36) if channel_first else (36, 36, 1)
         # The achieved goal is determined by the current state
@@ -115,7 +115,7 @@ def convert_if_needed(self, state: np.ndarray) -> Union[int, np.ndarray]:
         if self.discrete_obs_space:
             # The internal state is the binary representation of the
             # observed one
-            return int(sum([state[i] * 2**i for i in range(len(state))]))
+            return int(sum(state[i] * 2**i for i in range(len(state))))
 
         if self.image_obs_space:
             size = np.prod(self.image_shape)
@@ -135,7 +135,7 @@ def convert_to_bit_vector(self, state: Union[int, np.ndarray], batch_size: int)
         if isinstance(state, int):
             state = np.array(state).reshape(batch_size, -1)
             # Convert to binary representation
-            state = (((state[:, :] & (1 << np.arange(len(self.state))))) > 0).astype(int)
+            state = ((state[:, :] & (1 << np.arange(len(self.state)))) > 0).astype(int)
         elif self.image_obs_space:
             state = state.reshape(batch_size, -1)[:, : len(self.state)] / 255
         else:

diff --git a/stable_baselines3/common/envs/multi_input_envs.py b/stable_baselines3/common/envs/multi_input_envs.py
@@ -42,7 +42,7 @@ def __init__(
         discrete_actions: bool = True,
         channel_last: bool = True,
     ):
-        super(SimpleMultiObsEnv, self).__init__()
+        super().__init__()
 
         self.vector_size = 5
         if channel_last: