Skip to content

Commit

Permalink
Upgrade code to Python 3.7+ syntax using pyupgrade (DLR-RM#887)
Browse files Browse the repository at this point in the history
* Upgrade code to Python 3.7+ syntax

* Update changelog
  • Loading branch information
araffin authored Apr 25, 2022
1 parent 061841a commit a6f5049
Show file tree
Hide file tree
Showing 44 changed files with 129 additions and 129 deletions.
3 changes: 1 addition & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
Expand Down Expand Up @@ -46,7 +45,7 @@ def __getattr__(cls, name):

# Read version from file
version_file = os.path.join(os.path.dirname(__file__), "../stable_baselines3", "version.txt")
with open(version_file, "r") as file_handler:
with open(version_file) as file_handler:
__version__ = file_handler.read().strip()

# -- Project information -----------------------------------------------------
Expand Down
3 changes: 2 additions & 1 deletion docs/misc/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Changelog
==========


Release 1.5.1a4 (WIP)
Release 1.5.1a5 (WIP)
---------------------------

Breaking Changes:
Expand All @@ -31,6 +31,7 @@ Deprecations:

Others:
^^^^^^^
- Upgraded to Python 3.7+ syntax using ``pyupgrade``

Documentation:
^^^^^^^^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from setuptools import find_packages, setup

with open(os.path.join("stable_baselines3", "version.txt"), "r") as file_handler:
with open(os.path.join("stable_baselines3", "version.txt")) as file_handler:
__version__ = file_handler.read().strip()


Expand Down
2 changes: 1 addition & 1 deletion stable_baselines3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

# Read version from file
version_file = os.path.join(os.path.dirname(__file__), "version.txt")
with open(version_file, "r") as file_handler:
with open(version_file) as file_handler:
__version__ = file_handler.read().strip()


Expand Down
4 changes: 2 additions & 2 deletions stable_baselines3/a2c/a2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def __init__(
_init_setup_model: bool = True,
):

super(A2C, self).__init__(
super().__init__(
policy,
env,
learning_rate=learning_rate,
Expand Down Expand Up @@ -194,7 +194,7 @@ def learn(
reset_num_timesteps: bool = True,
) -> "A2C":

return super(A2C, self).learn(
return super().learn(
total_timesteps=total_timesteps,
callback=callback,
log_interval=log_interval,
Expand Down
2 changes: 1 addition & 1 deletion stable_baselines3/common/atari_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,4 +245,4 @@ def __init__(
if clip_reward:
env = ClipRewardEnv(env)

super(AtariWrapper, self).__init__(env)
super().__init__(env)
8 changes: 4 additions & 4 deletions stable_baselines3/common/buffers.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def __init__(
device: Union[th.device, str] = "cpu",
n_envs: int = 1,
):
super(BaseBuffer, self).__init__()
super().__init__()
self.buffer_size = buffer_size
self.observation_space = observation_space
self.action_space = action_space
Expand Down Expand Up @@ -179,7 +179,7 @@ def __init__(
optimize_memory_usage: bool = False,
handle_timeout_termination: bool = True,
):
super(ReplayBuffer, self).__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
super().__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)

# Adjust buffer size
self.buffer_size = max(buffer_size // n_envs, 1)
Expand Down Expand Up @@ -339,7 +339,7 @@ def __init__(
n_envs: int = 1,
):

super(RolloutBuffer, self).__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
super().__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
self.gae_lambda = gae_lambda
self.gamma = gamma
self.observations, self.actions, self.rewards, self.advantages = None, None, None, None
Expand All @@ -358,7 +358,7 @@ def reset(self) -> None:
self.log_probs = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
self.advantages = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
self.generator_ready = False
super(RolloutBuffer, self).reset()
super().reset()

def compute_returns_and_advantage(self, last_values: th.Tensor, dones: np.ndarray) -> None:
"""
Expand Down
22 changes: 11 additions & 11 deletions stable_baselines3/common/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class BaseCallback(ABC):
"""

def __init__(self, verbose: int = 0):
super(BaseCallback, self).__init__()
super().__init__()
# The RL model
self.model = None # type: Optional[base_class.BaseAlgorithm]
# An alias for self.model.get_env(), the environment used for training
Expand Down Expand Up @@ -127,14 +127,14 @@ class EventCallback(BaseCallback):
"""

def __init__(self, callback: Optional[BaseCallback] = None, verbose: int = 0):
super(EventCallback, self).__init__(verbose=verbose)
super().__init__(verbose=verbose)
self.callback = callback
# Give access to the parent
if callback is not None:
self.callback.parent = self

def init_callback(self, model: "base_class.BaseAlgorithm") -> None:
super(EventCallback, self).init_callback(model)
super().init_callback(model)
if self.callback is not None:
self.callback.init_callback(self.model)

Expand Down Expand Up @@ -169,7 +169,7 @@ class CallbackList(BaseCallback):
"""

def __init__(self, callbacks: List[BaseCallback]):
super(CallbackList, self).__init__()
super().__init__()
assert isinstance(callbacks, list)
self.callbacks = callbacks

Expand Down Expand Up @@ -228,7 +228,7 @@ class CheckpointCallback(BaseCallback):
"""

def __init__(self, save_freq: int, save_path: str, name_prefix: str = "rl_model", verbose: int = 0):
super(CheckpointCallback, self).__init__(verbose)
super().__init__(verbose)
self.save_freq = save_freq
self.save_path = save_path
self.name_prefix = name_prefix
Expand Down Expand Up @@ -256,7 +256,7 @@ class ConvertCallback(BaseCallback):
"""

def __init__(self, callback: Callable[[Dict[str, Any], Dict[str, Any]], bool], verbose: int = 0):
super(ConvertCallback, self).__init__(verbose)
super().__init__(verbose)
self.callback = callback

def _on_step(self) -> bool:
Expand Down Expand Up @@ -307,7 +307,7 @@ def __init__(
verbose: int = 1,
warn: bool = True,
):
super(EvalCallback, self).__init__(callback_after_eval, verbose=verbose)
super().__init__(callback_after_eval, verbose=verbose)

self.callback_on_new_best = callback_on_new_best
if self.callback_on_new_best is not None:
Expand Down Expand Up @@ -480,7 +480,7 @@ class StopTrainingOnRewardThreshold(BaseCallback):
"""

def __init__(self, reward_threshold: float, verbose: int = 0):
super(StopTrainingOnRewardThreshold, self).__init__(verbose=verbose)
super().__init__(verbose=verbose)
self.reward_threshold = reward_threshold

def _on_step(self) -> bool:
Expand All @@ -505,7 +505,7 @@ class EveryNTimesteps(EventCallback):
"""

def __init__(self, n_steps: int, callback: BaseCallback):
super(EveryNTimesteps, self).__init__(callback)
super().__init__(callback)
self.n_steps = n_steps
self.last_time_trigger = 0

Expand All @@ -528,7 +528,7 @@ class StopTrainingOnMaxEpisodes(BaseCallback):
"""

def __init__(self, max_episodes: int, verbose: int = 0):
super(StopTrainingOnMaxEpisodes, self).__init__(verbose=verbose)
super().__init__(verbose=verbose)
self.max_episodes = max_episodes
self._total_max_episodes = max_episodes
self.n_episodes = 0
Expand Down Expand Up @@ -573,7 +573,7 @@ class StopTrainingOnNoModelImprovement(BaseCallback):
"""

def __init__(self, max_no_improvement_evals: int, min_evals: int = 0, verbose: int = 0):
super(StopTrainingOnNoModelImprovement, self).__init__(verbose=verbose)
super().__init__(verbose=verbose)
self.max_no_improvement_evals = max_no_improvement_evals
self.min_evals = min_evals
self.last_best_mean_reward = -np.inf
Expand Down
22 changes: 11 additions & 11 deletions stable_baselines3/common/distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class Distribution(ABC):
"""Abstract base class for distributions."""

def __init__(self):
super(Distribution, self).__init__()
super().__init__()
self.distribution = None

@abstractmethod
Expand Down Expand Up @@ -120,7 +120,7 @@ class DiagGaussianDistribution(Distribution):
"""

def __init__(self, action_dim: int):
super(DiagGaussianDistribution, self).__init__()
super().__init__()
self.action_dim = action_dim
self.mean_actions = None
self.log_std = None
Expand Down Expand Up @@ -201,13 +201,13 @@ class SquashedDiagGaussianDistribution(DiagGaussianDistribution):
"""

def __init__(self, action_dim: int, epsilon: float = 1e-6):
super(SquashedDiagGaussianDistribution, self).__init__(action_dim)
super().__init__(action_dim)
# Avoid NaN (prevents division by zero or log of zero)
self.epsilon = epsilon
self.gaussian_actions = None

def proba_distribution(self, mean_actions: th.Tensor, log_std: th.Tensor) -> "SquashedDiagGaussianDistribution":
super(SquashedDiagGaussianDistribution, self).proba_distribution(mean_actions, log_std)
super().proba_distribution(mean_actions, log_std)
return self

def log_prob(self, actions: th.Tensor, gaussian_actions: Optional[th.Tensor] = None) -> th.Tensor:
Expand All @@ -219,7 +219,7 @@ def log_prob(self, actions: th.Tensor, gaussian_actions: Optional[th.Tensor] = N
gaussian_actions = TanhBijector.inverse(actions)

# Log likelihood for a Gaussian distribution
log_prob = super(SquashedDiagGaussianDistribution, self).log_prob(gaussian_actions)
log_prob = super().log_prob(gaussian_actions)
# Squash correction (from original SAC implementation)
# this comes from the fact that tanh is bijective and differentiable
log_prob -= th.sum(th.log(1 - actions**2 + self.epsilon), dim=1)
Expand Down Expand Up @@ -254,7 +254,7 @@ class CategoricalDistribution(Distribution):
"""

def __init__(self, action_dim: int):
super(CategoricalDistribution, self).__init__()
super().__init__()
self.action_dim = action_dim

def proba_distribution_net(self, latent_dim: int) -> nn.Module:
Expand Down Expand Up @@ -305,7 +305,7 @@ class MultiCategoricalDistribution(Distribution):
"""

def __init__(self, action_dims: List[int]):
super(MultiCategoricalDistribution, self).__init__()
super().__init__()
self.action_dims = action_dims

def proba_distribution_net(self, latent_dim: int) -> nn.Module:
Expand Down Expand Up @@ -360,7 +360,7 @@ class BernoulliDistribution(Distribution):
"""

def __init__(self, action_dims: int):
super(BernoulliDistribution, self).__init__()
super().__init__()
self.action_dims = action_dims

def proba_distribution_net(self, latent_dim: int) -> nn.Module:
Expand Down Expand Up @@ -433,7 +433,7 @@ def __init__(
learn_features: bool = False,
epsilon: float = 1e-6,
):
super(StateDependentNoiseDistribution, self).__init__()
super().__init__()
self.action_dim = action_dim
self.latent_sde_dim = None
self.mean_actions = None
Expand Down Expand Up @@ -597,7 +597,7 @@ def log_prob_from_params(
return actions, log_prob


class TanhBijector(object):
class TanhBijector:
"""
Bijective transformation of a probability distribution
using a squashing function (tanh)
Expand All @@ -607,7 +607,7 @@ class TanhBijector(object):
"""

def __init__(self, epsilon: float = 1e-6):
super(TanhBijector, self).__init__()
super().__init__()
self.epsilon = epsilon

@staticmethod
Expand Down
6 changes: 3 additions & 3 deletions stable_baselines3/common/envs/bit_flipping_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(
image_obs_space: bool = False,
channel_first: bool = True,
):
super(BitFlippingEnv, self).__init__()
super().__init__()
# Shape of the observation when using image space
self.image_shape = (1, 36, 36) if channel_first else (36, 36, 1)
# The achieved goal is determined by the current state
Expand Down Expand Up @@ -115,7 +115,7 @@ def convert_if_needed(self, state: np.ndarray) -> Union[int, np.ndarray]:
if self.discrete_obs_space:
# The internal state is the binary representation of the
# observed one
return int(sum([state[i] * 2**i for i in range(len(state))]))
return int(sum(state[i] * 2**i for i in range(len(state))))

if self.image_obs_space:
size = np.prod(self.image_shape)
Expand All @@ -135,7 +135,7 @@ def convert_to_bit_vector(self, state: Union[int, np.ndarray], batch_size: int)
if isinstance(state, int):
state = np.array(state).reshape(batch_size, -1)
# Convert to binary representation
state = (((state[:, :] & (1 << np.arange(len(self.state))))) > 0).astype(int)
state = ((state[:, :] & (1 << np.arange(len(self.state)))) > 0).astype(int)
elif self.image_obs_space:
state = state.reshape(batch_size, -1)[:, : len(self.state)] / 255
else:
Expand Down
2 changes: 1 addition & 1 deletion stable_baselines3/common/envs/multi_input_envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def __init__(
discrete_actions: bool = True,
channel_last: bool = True,
):
super(SimpleMultiObsEnv, self).__init__()
super().__init__()

self.vector_size = 5
if channel_last:
Expand Down
Loading

0 comments on commit a6f5049

Please sign in to comment.