From bed8016e82d24b8eea8894d660a78da03306549f Mon Sep 17 00:00:00 2001
From: PaulDaoudi <daoudi.paul@gmail.com>
Date: Sat, 27 Jan 2024 18:12:58 +0100
Subject: [PATCH] Rllg branch (#68)

* Add dosctrings, types and docker

* Add dosctrings, types and docker
---
 .gitignore                                    |   1 +
 RLLG/.dockerignore                            |   9 +
 RLLG/LICENSE                                  |  40 +-
 RLLG/README.md                                | 106 +-
 RLLG/agents/algos/pag.py                      | 636 +++++++-----
 RLLG/agents/algos/pig.py                      | 575 ++++++-----
 RLLG/agents/algos/sac.py                      | 473 +++++----
 RLLG/agents/algos/sag.py                      | 531 +++++-----
 RLLG/agents/common/config.py                  |  67 ++
 RLLG/agents/common/creation_utils.py          | 150 +++
 RLLG/agents/common/model.py                   | 950 ++++++++++++------
 RLLG/agents/common/replay_buffer.py           | 583 +++++++----
 RLLG/agents/common/sampler.py                 | 466 +++++----
 RLLG/agents/common/utils.py                   | 187 ++--
 RLLG/agents/common/visualization_helpers.py   | 285 +++---
 RLLG/docker/Dockerfile                        |  29 +
 RLLG/envs/ball_in_cup/confidence.py           | 121 ++-
 RLLG/envs/ball_in_cup/create_ball_in_cup.py   | 132 ++-
 RLLG/envs/ball_in_cup/local_expert_policy.py  | 121 ++-
 .../ball_in_cup/models/near_expert_sac_650    | Bin 0 -> 22735 bytes
 .../ball_in_cup/models/near_expert_sac_780    | Bin 0 -> 22735 bytes
 .../bullet_small_reach/bullet_small_reach.py  | 110 +-
 RLLG/envs/bullet_small_reach/confidence.py    | 115 ++-
 .../create_bullet_small_reach.py              |  88 +-
 .../bullet_small_reach/local_expert_policy.py | 152 ++-
 RLLG/envs/cartpole/confidence.py              | 137 ++-
 RLLG/envs/cartpole/create_cartpole.py         | 174 ++--
 RLLG/envs/cartpole/local_expert_policy.py     |  68 +-
 RLLG/envs/confidence.py                       | 107 +-
 RLLG/envs/creation.py                         | 158 +--
 RLLG/envs/env_utils.py                        | 171 ++--
 RLLG/envs/hirl_point_fall/confidence.py       | 104 +-
 .../hirl_point_fall/create_hirl_point_fall.py |  92 +-
 .../hirl_point_fall/local_expert_policy.py    | 122 ++-
 RLLG/envs/hirl_point_fall/wrapper.py          | 109 +-
 RLLG/envs/point_circle/confidence.py          | 105 +-
 RLLG/envs/point_circle/create_point_circle.py | 103 +-
 RLLG/envs/point_circle/local_expert_policy.py | 121 ++-
 RLLG/envs/point_circle/point_circle.py        | 106 +-
 RLLG/envs/point_mass/confidence.py            | 120 ++-
 RLLG/envs/point_mass/create_point_mass.py     | 139 ++-
 RLLG/envs/point_mass/local_expert_policy.py   | 121 ++-
 RLLG/main.py                                  | 260 ++---
 RLLG/notebooks/Visualization.ipynb            | 308 +++---
 RLLG/notebooks/helpers.py                     | 145 ++-
 RLLG/notebooks/video_fn.py                    | 117 ++-
 RLLG/ray_config/ball_in_cup_cfg.yaml          | 130 +--
 RLLG/ray_config/bullet_small_reach_cfg.yaml   | 142 +--
 RLLG/ray_config/cartpole_cfg.yaml             | 162 +--
 RLLG/ray_config/hirl_point_fall_cfg.yaml      | 132 +--
 RLLG/ray_config/point_circle_cfg.yaml         | 142 +--
 RLLG/ray_config/point_mass_cfg.yaml           | 132 +--
 RLLG/requirements.txt                         |  23 +
 RLLG/sac_main_fn.py                           | 183 ++--
 RLLG/setup.py                                 |  62 +-
 55 files changed, 6175 insertions(+), 3747 deletions(-)
 create mode 100644 RLLG/.dockerignore
 create mode 100644 RLLG/agents/common/config.py
 create mode 100644 RLLG/agents/common/creation_utils.py
 create mode 100644 RLLG/docker/Dockerfile
 create mode 100644 RLLG/envs/ball_in_cup/models/near_expert_sac_650
 create mode 100644 RLLG/envs/ball_in_cup/models/near_expert_sac_780
 create mode 100644 RLLG/requirements.txt

diff --git a/.gitignore b/.gitignore
index 85e7c1df..51429500 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 /.idea/
+*.DS_Store
diff --git a/RLLG/.dockerignore b/RLLG/.dockerignore
new file mode 100644
index 00000000..bd17c07d
--- /dev/null
+++ b/RLLG/.dockerignore
@@ -0,0 +1,9 @@
+__pycache__/
+ray_results
+.idea/
+logs/
+.pytype/
+.vscode/
+# ignore for docker builds
+.git/
+.mypy_cache/
\ No newline at end of file
diff --git a/RLLG/LICENSE b/RLLG/LICENSE
index 4ad327d9..e341872e 100644
--- a/RLLG/LICENSE
+++ b/RLLG/LICENSE
@@ -1,21 +1,21 @@
-MIT License
-
-Copyright (c) 2023 Paul Daoudi
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+MIT License
+
+Copyright (c) 2023 Paul Daoudi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
\ No newline at end of file
diff --git a/RLLG/README.md b/RLLG/README.md
index 8649898e..c7859575 100644
--- a/RLLG/README.md
+++ b/RLLG/README.md
@@ -1,49 +1,57 @@
-# Enhancing Reinforcement Learning agents with Local Guides 
-
-This is the official implementation of the techniques discussed in the paper [Enhancing Reinforcement Learning agents with Local Guides](https://hal.science/hal-04052358/file/Final_Reinforcement_Learning_with_Local_Guides.pdf).
-
-## Steps to launch it for a new environment
-
-- In the folder `envs`, create a folder with the name of the environment with 3 files:
-  - `create_env_name` to create the environment
-  - `local_expert_policy` for the local expert
-  - `confidence` for the confidence function $\lambda$
-- Add the environment in the global files `creation` and `confidence` in `envs`
-- Add a config file in `ray_config`
-- Modify the `main` file to include the new environment
-- Enjoy :)
-
-## Notes regarding the Point-Reach environment
-
-PointReach is based on [Bullet-Safety-Gym](https://github.com/SvenGronauer/Bullet-Safety-Gym), and has been modified internally (directly in their source code) to make it more difficult.
-
-All the details can be found in Appendix B of the paper.
-
-## Visualization
-
-All the results are saved in a ray tune `Experimentanalysis`. You can plot them in the `Visualization.ipynb` notebook.
-
-## License
-
-We follow MIT License. Please see the [License](./LICENSE) file for more information.
-
-**Disclaimer:** This is not an officially supported Huawei Product.
-
-
-## Credits
-
-This code is built upon the [SimpleSAC Github](https://github.com/young-geng/SimpleSAC), and some wrappers of [gym](https://github.com/openai/gym/tree/master).
-
-
-## Cite us
-
-If you find this technique useful and you use it in a project, please cite it:
-```
-@inproceedings{daoudi2023enhancing,
-  title={Enhancing Reinforcement Learning Agents with Local Guides},
-  author={Daoudi, Paul and Robu, Bogdan and Prieur, Christophe and Dos Santos, Ludovic and Barlier, Merwan},
-  booktitle={Proceedings of the 2023 International Conference on Autonomous Agents and Multiagent Systems},
-  pages={829--838},
-  year={2023}
-}
-```
+# Enhancing Reinforcement Learning agents with Local Guides 
+
+This is the official implementation of the techniques discussed in the paper [Enhancing Reinforcement Learning agents with Local Guides](https://hal.science/hal-04052358/file/Final_Reinforcement_Learning_with_Local_Guides.pdf).
+
+## Create the conda virtual environment
+
+```
+conda create --name rllg python=3.8
+conda activate rllg
+pip install -e .
+```
+
+## Steps to launch it for a new environment
+
+- In the folder `envs`, create a folder with the name of the environment with 3 files:
+  - `create_env_name` to create the environment
+  - `local_expert_policy` for the local expert
+  - `confidence` for the confidence function $\lambda$
+- Add the environment in the global files `creation` and `confidence` in `envs`
+- Add a config file in `ray_config`
+- Modify the `main` file to include the new environment
+- Enjoy :)
+
+## Notes regarding the Point-Reach environment
+
+PointReach is based on [Bullet-Safety-Gym](https://github.com/SvenGronauer/Bullet-Safety-Gym), and has been modified internally (directly in their source code) to make it more difficult.
+
+All the details can be found in Appendix B of the paper.
+
+## Visualization
+
+All the results are saved in a ray tune `Experimentanalysis`. You can plot them in the `Visualization.ipynb` notebook.
+
+## License
+
+We follow MIT License. Please see the [License](./LICENSE) file for more information.
+
+**Disclaimer:** This is not an officially supported Huawei Product.
+
+
+## Credits
+
+This code is built upon the [SimpleSAC Github](https://github.com/young-geng/SimpleSAC), and some wrappers of [gym](https://github.com/openai/gym/tree/master).
+
+
+## Cite us
+
+If you find this technique useful and you use it in a project, please cite it:
+```
+@inproceedings{daoudi2023enhancing,
+  title={Enhancing Reinforcement Learning Agents with Local Guides},
+  author={Daoudi, Paul and Robu, Bogdan and Prieur, Christophe and Dos Santos, Ludovic and Barlier, Merwan},
+  booktitle={Proceedings of the 2023 International Conference on Autonomous Agents and Multiagent Systems},
+  pages={829--838},
+  year={2023}
+}
+```
diff --git a/RLLG/agents/algos/pag.py b/RLLG/agents/algos/pag.py
index 1bf65f3e..3f6106b9 100644
--- a/RLLG/agents/algos/pag.py
+++ b/RLLG/agents/algos/pag.py
@@ -1,260 +1,376 @@
-# 2023.02.14-Changed for RLLG
-#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
-
-# Copyright (c) 2020 Xinyang Geng.
-
-# All rights reserved.
-
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-
-from ml_collections import ConfigDict
-import numpy as np
-import torch
-import torch.optim as optim
-import torch.nn.functional as F
-from agents.common.model import Scalar, soft_target_update
-
-
-class PAG(object):
-
-    @staticmethod
-    def get_default_config(updates=None):
-        config = ConfigDict()
-        config.discount = 0.99
-        config.reward_scale = 1.0
-        config.alpha_multiplier = 1.0
-        config.use_automatic_entropy_tuning = True
-        config.use_automatic_entropy_tuning_parametrized_perturbation = True
-        config.backup_entropy = True
-        config.target_entropy = 0.0
-        config.policy_lr = 3e-4
-        config.qf_lr = 3e-4
-        config.optimizer_type = 'adam'
-        config.soft_target_update_rate = 5e-3
-        config.target_update_period = 1
-
-        if updates is not None:
-            config.update(ConfigDict(updates).copy_and_resolve_references())
-        return config
-
-    def __init__(self, config, policy, sampler_policy, qf1, qf2, target_qf1, target_qf2,
-                 use_local, local_expert, parametrized_perturbation, sampler_parametrized_perturbation):
-        self.config = PAG.get_default_config(config)
-        self.policy = policy
-        self.sampler_policy = sampler_policy
-        self.qf1 = qf1
-        self.qf2 = qf2
-        self.target_qf1 = target_qf1
-        self.target_qf2 = target_qf2
-        self.parametrized_perturbation = parametrized_perturbation
-        self.sampler_parametrized_perturbation = sampler_parametrized_perturbation
-
-        # hyperparams
-        self.use_local = use_local
-        self.beta = 1.
-        self.local_expert = local_expert
-
-        optimizer_class = {
-            'adam': optim.Adam,
-            'sgd': optim.SGD,
-        }[self.config.optimizer_type]
-
-        self.policy_optimizer = optimizer_class(
-            self.policy.parameters(), self.config.policy_lr,
-        )
-        self.qf_optimizer = optimizer_class(
-            list(self.qf1.parameters()) + list(self.qf2.parameters()), self.config.qf_lr
-        )
-        self.parametrized_perturbation_optimizer = optimizer_class(
-            self.parametrized_perturbation.parameters(), self.config.policy_lr,
-        )
-
-        if self.config.use_automatic_entropy_tuning:
-            self.log_alpha = Scalar(0.0)
-            self.alpha_optimizer = optimizer_class(
-                self.log_alpha.parameters(),
-                lr=self.config.policy_lr,
-            )
-        else:
-            self.log_alpha = None
-
-        if self.config.use_automatic_entropy_tuning_parametrized_perturbation:
-            self.expert_log_alpha = Scalar(0.0)
-            self.expert_alpha_optimizer = optimizer_class(
-                self.expert_log_alpha.parameters(),
-                lr=self.config.policy_lr,
-            )
-        else:
-            self.expert_log_alpha = None
-
-        self.update_target_network(1.0)
-        self._total_steps = 0
-
-    def update_target_network(self, soft_target_update_rate):
-        soft_target_update(self.qf1, self.target_qf1, soft_target_update_rate)
-        soft_target_update(self.qf2, self.target_qf2, soft_target_update_rate)
-
-    def train(self, batch, batch_success=None):
-        self._total_steps += 1
-
-        # classic obs
-        observations = batch['observations']
-        actions = batch['actions']
-        rewards = batch['rewards']
-        next_observations = batch['next_observations']
-        dones = batch['dones']
-
-        # retrieve local experts information
-        lambda_s_current = batch['use_local_current']
-        lambda_s_next = batch['use_local_next']
-        expert_actions = batch['expert_actions']
-        next_expert_actions = batch['next_expert_actions']
-
-        new_actions, log_pi = self.policy(observations)
-
-        if self.config.use_automatic_entropy_tuning:
-            alpha_loss = -(self.log_alpha() * (log_pi + self.config.target_entropy).detach()).mean()
-            alpha = self.log_alpha().exp() * self.config.alpha_multiplier
-        else:
-            alpha_loss = observations.new_tensor(0.0)
-            alpha = observations.new_tensor(self.config.alpha_multiplier)
-
-        """ Policy loss """
-        q_new_actions = torch.min(
-            self.qf1(observations, new_actions),
-            self.qf2(observations, new_actions),
-        )
-        policy_loss = (alpha*log_pi - q_new_actions).mean()
-
-        """ Parametrized noise loss"""
-        parametrized_perturbation_actions, expert_log_pi = self.parametrized_perturbation(observations, expert_actions)
-
-        if self.config.use_automatic_entropy_tuning_parametrized_perturbation:
-            expert_alpha_loss = -(self.expert_log_alpha() * (expert_log_pi + self.config.target_entropy).detach()).mean()
-            expert_alpha = self.expert_log_alpha().exp() * self.config.expert_alpha_multiplier
-        else:
-            expert_alpha_loss = observations.new_tensor(0.0)
-            expert_alpha = observations.new_tensor(self.config.expert_alpha_multiplier)
-
-        q_new_actions_perturbed = lambda_s_current * torch.min(
-            self.qf1(observations, parametrized_perturbation_actions),
-            self.qf2(observations, parametrized_perturbation_actions),
-        )
-        parametrized_perturbation_loss = (expert_alpha * expert_log_pi - q_new_actions_perturbed).mean()
-
-        """ Q function loss """
-        q1_pred = self.qf1(observations, actions)
-        q2_pred = self.qf2(observations, actions)
-
-        with torch.no_grad():
-            new_next_actions, next_log_pi = self.policy(next_observations)
-
-            next_log_pi = (1 - lambda_s_next) * next_log_pi
-
-            expert_target_q_values = torch.min(
-                self.target_qf1(next_observations, next_expert_actions),
-                self.target_qf2(next_observations, next_expert_actions),
-            )
-            classic_target_q_values = torch.min(
-                self.target_qf1(next_observations, new_next_actions),
-                self.target_qf2(next_observations, new_next_actions),
-            )
-            target_q_values = lambda_s_next * expert_target_q_values + \
-                              (1 - lambda_s_next) * classic_target_q_values
-
-            if self.config.backup_entropy:
-                target_q_values = target_q_values - alpha * next_log_pi
-
-        q_target = self.config.reward_scale * rewards + (1. - dones) * self.config.discount * target_q_values
-        qf1_loss = F.mse_loss(q1_pred, q_target.detach())
-        qf2_loss = F.mse_loss(q2_pred, q_target.detach())
-        qf_loss = qf1_loss + qf2_loss
-
-        if self.config.use_automatic_entropy_tuning:
-            self.alpha_optimizer.zero_grad()
-            alpha_loss.backward()
-            self.alpha_optimizer.step()
-
-        if self.config.use_automatic_entropy_tuning_parametrized_perturbation:
-            self.expert_alpha_optimizer.zero_grad()
-            expert_alpha_loss.backward()
-            self.expert_alpha_optimizer.step()
-
-        self.policy_optimizer.zero_grad()
-        policy_loss.backward()
-        self.policy_optimizer.step()
-
-        self.parametrized_perturbation_optimizer.zero_grad()
-        parametrized_perturbation_loss.backward()
-        self.parametrized_perturbation_optimizer.step()
-
-        self.qf_optimizer.zero_grad()
-        qf_loss.backward()
-        self.qf_optimizer.step()
-
-        if self.total_steps % self.config.target_update_period == 0:
-            self.update_target_network(
-                self.config.soft_target_update_rate
-            )
-
-        metrics_to_return = dict(
-            log_pi=log_pi.mean().item(),
-            policy_loss=policy_loss.item(),
-            parametrized_perturbation_loss=parametrized_perturbation_loss.item(),
-            qf1_loss=qf1_loss.item(),
-            qf2_loss=qf2_loss.item(),
-            alpha_loss=alpha_loss.item(),
-            alpha=alpha.item(),
-            expert_alpha_loss=expert_alpha_loss.item(),
-            expert_alpha=expert_alpha.item(),
-            average_qf1=q1_pred.mean().item(),
-            average_qf2=q2_pred.mean().item(),
-            average_target_q=target_q_values.mean().item(),
-            total_steps=self.total_steps,
-        )
-
-        return metrics_to_return
-
-    def torch_to_device(self, device):
-        for module in self.modules:
-            module.to(device)
-
-    def get_action(self,
-                   env,
-                   observation,
-                   deterministic=False,
-                   add_local_information=False):
-        """
-        In switched agent, the agent always picks the expert action if it is relevant.
-        """
-
-        action = self.sampler_policy(
-                    np.expand_dims(observation, 0), deterministic=deterministic
-                )[0, :]
-        if add_local_information:
-            use_local = self.use_local.get_use_local(env,
-                                                     observation)
-            expert_action_init = self.local_expert.get_action(observation,
-                                                              init_action=action,
-                                                              env=env)
-            expert_action = self.sampler_parametrized_perturbation(
-                np.expand_dims(observation, 0), np.expand_dims(expert_action_init, 0),
-                beta=self.beta, deterministic=deterministic
-            )[0, :]
-            if use_local:
-                return expert_action, use_local, expert_action
-            return action, use_local, expert_action
-        return action
-
-    @property
-    def modules(self):
-        modules = [self.policy, self.qf1, self.qf2, self.target_qf1, self.target_qf2]
-        if self.config.use_automatic_entropy_tuning:
-            modules.append(self.log_alpha)
-        return modules
-
-    @property
-    def total_steps(self):
-        return self._total_steps
+# 2023.02.14-Changed for RLLG
+#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
+
+# Copyright (c) 2020 Xinyang Geng.
+
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from typing import Optional, Tuple, Any, Dict, List, Union
+from ml_collections import ConfigDict
+import numpy as np
+import torch
+import torch.optim as optim
+import torch.nn.functional as F
+from agents.common.model import Scalar, soft_target_update, SamplerPolicy, ExpertSamplerPolicy
+
+
+class PAG(object):
+    """
+    Perturbed Action Guided (PAG) algorithm implementation.
+
+    Parameters:
+    -----------
+    config: dict
+        Configuration parameters for SAC.
+    policy: torch.nn.Module
+        The policy network.
+    sampler_policy: SamplerPolicy
+        The sampler policy network.
+    qf1: torch.nn.Module
+        The first critic network.
+    qf2: torch.nn.Module
+        The second critic network.
+    target_qf1: torch.nn.Module
+        The target network for the first critic.
+    target_qf2: torch.nn.Module
+        The target network for the second critic.
+    use_local: float
+        Float confidence function indicating whether to trust local policies.
+    local_expert: Any
+        Local expert.
+    parametrized_perturbation: Any, optional
+        The parametrized perturbation network.
+    sampler_parametrized_perturbation: ExpertSamplerPolicy
+        The network used for sampling perturbations.
+    """
+
+    @staticmethod
+    def get_default_config(updates: Optional[Dict] = None) -> ConfigDict:
+        """
+        Get the default configuration for PAG.
+
+        Parameters:
+        -----------
+        updates: dict, optional
+            Optional dictionary to update default configuration.
+
+        Returns:
+        --------
+        ConfigDict
+            Default configuration for SAC.
+        """
+        config = ConfigDict()
+        config.discount = 0.99
+        config.reward_scale = 1.0
+        config.alpha_multiplier = 1.0
+        config.use_automatic_entropy_tuning = True
+        config.use_automatic_entropy_tuning_parametrized_perturbation = True
+        config.backup_entropy = True
+        config.target_entropy = 0.0
+        config.policy_lr = 3e-4
+        config.qf_lr = 3e-4
+        config.optimizer_type = 'adam'
+        config.soft_target_update_rate = 5e-3
+        config.target_update_period = 1
+
+        if updates is not None:
+            config.update(ConfigDict(updates).copy_and_resolve_references())
+        return config
+
+    def __init__(self,
+                 config: Dict,
+                 policy: torch.nn.Module,
+                 sampler_policy: SamplerPolicy,
+                 qf1: torch.nn.Module,
+                 qf2: torch.nn.Module,
+                 target_qf1: torch.nn.Module,
+                 target_qf2: torch.nn.Module,
+                 use_local: float,
+                 local_expert: Any,
+                 parametrized_perturbation: torch.nn.Module,
+                 sampler_parametrized_perturbation: ExpertSamplerPolicy):
+        self.config = PAG.get_default_config(config)
+        self.policy = policy
+        self.sampler_policy = sampler_policy
+        self.qf1 = qf1
+        self.qf2 = qf2
+        self.target_qf1 = target_qf1
+        self.target_qf2 = target_qf2
+        self.parametrized_perturbation = parametrized_perturbation
+        self.sampler_parametrized_perturbation = sampler_parametrized_perturbation
+
+        # hyperparams
+        self.use_local = use_local
+        self.beta = 1.
+        self.local_expert = local_expert
+
+        optimizer_class = {
+            'adam': optim.Adam,
+            'sgd': optim.SGD,
+        }[self.config.optimizer_type]
+
+        self.policy_optimizer = optimizer_class(
+            self.policy.parameters(), self.config.policy_lr,
+        )
+        self.qf_optimizer = optimizer_class(
+            list(self.qf1.parameters()) + list(self.qf2.parameters()), self.config.qf_lr
+        )
+        self.parametrized_perturbation_optimizer = optimizer_class(
+            self.parametrized_perturbation.parameters(), self.config.policy_lr,
+        )
+
+        if self.config.use_automatic_entropy_tuning:
+            self.log_alpha = Scalar(0.0)
+            self.alpha_optimizer = optimizer_class(
+                self.log_alpha.parameters(),
+                lr=self.config.policy_lr,
+            )
+        else:
+            self.log_alpha = None
+
+        if self.config.use_automatic_entropy_tuning_parametrized_perturbation:
+            self.expert_log_alpha = Scalar(0.0)
+            self.expert_alpha_optimizer = optimizer_class(
+                self.expert_log_alpha.parameters(),
+                lr=self.config.policy_lr,
+            )
+        else:
+            self.expert_log_alpha = None
+
+        self.update_target_network(1.0)
+        self._total_steps = 0
+
+    def update_target_network(self, soft_target_update_rate: float) -> None:
+        """
+        Update the target networks with soft target updates.
+
+        Parameters:
+        -----------
+        soft_target_update_rate: float
+            Rate of soft target network updates.
+        """
+        soft_target_update(self.qf1, self.target_qf1, soft_target_update_rate)
+        soft_target_update(self.qf2, self.target_qf2, soft_target_update_rate)
+
+    def train(self, batch: Dict[str, Any], batch_success: Optional[Dict[str, torch.Tensor]] = None) -> Dict[
+        str, Any]:
+        """
+        Train the PAG agent on a batch of experiences.
+
+        Parameters:
+        -----------
+        batch: dict
+            A dictionary containing the the transitions.
+        batch_success: dict, optional
+            A dictionary containing the the transitions.
+
+        Returns:
+        --------
+        dict
+            A dictionary containing training metrics.
+        """
+        self._total_steps += 1
+
+        # classic obs
+        observations = batch['observations']
+        actions = batch['actions']
+        rewards = batch['rewards']
+        next_observations = batch['next_observations']
+        dones = batch['dones']
+
+        # retrieve local experts information
+        lambda_s_current = batch['use_local_current']
+        lambda_s_next = batch['use_local_next']
+        expert_actions = batch['expert_actions']
+        next_expert_actions = batch['next_expert_actions']
+
+        new_actions, log_pi = self.policy(observations)
+
+        if self.config.use_automatic_entropy_tuning:
+            alpha_loss = -(self.log_alpha() * (log_pi + self.config.target_entropy).detach()).mean()
+            alpha = self.log_alpha().exp() * self.config.alpha_multiplier
+        else:
+            alpha_loss = observations.new_tensor(0.0)
+            alpha = observations.new_tensor(self.config.alpha_multiplier)
+
+        """ Policy loss """
+        q_new_actions = torch.min(
+            self.qf1(observations, new_actions),
+            self.qf2(observations, new_actions),
+        )
+        policy_loss = (alpha*log_pi - q_new_actions).mean()
+
+        """ Parametrized noise loss"""
+        parametrized_perturbation_actions, expert_log_pi = self.parametrized_perturbation(observations, expert_actions)
+
+        if self.config.use_automatic_entropy_tuning_parametrized_perturbation:
+            expert_alpha_loss = -(self.expert_log_alpha() * (expert_log_pi + self.config.target_entropy).detach()).mean()
+            expert_alpha = self.expert_log_alpha().exp() * self.config.expert_alpha_multiplier
+        else:
+            expert_alpha_loss = observations.new_tensor(0.0)
+            expert_alpha = observations.new_tensor(self.config.expert_alpha_multiplier)
+
+        q_new_actions_perturbed = lambda_s_current * torch.min(
+            self.qf1(observations, parametrized_perturbation_actions),
+            self.qf2(observations, parametrized_perturbation_actions),
+        )
+        parametrized_perturbation_loss = (expert_alpha * expert_log_pi - q_new_actions_perturbed).mean()
+
+        """ Q function loss """
+        q1_pred = self.qf1(observations, actions)
+        q2_pred = self.qf2(observations, actions)
+
+        with torch.no_grad():
+            new_next_actions, next_log_pi = self.policy(next_observations)
+
+            next_log_pi = (1 - lambda_s_next) * next_log_pi
+
+            expert_target_q_values = torch.min(
+                self.target_qf1(next_observations, next_expert_actions),
+                self.target_qf2(next_observations, next_expert_actions),
+            )
+            classic_target_q_values = torch.min(
+                self.target_qf1(next_observations, new_next_actions),
+                self.target_qf2(next_observations, new_next_actions),
+            )
+            target_q_values = lambda_s_next * expert_target_q_values + \
+                              (1 - lambda_s_next) * classic_target_q_values
+
+            if self.config.backup_entropy:
+                target_q_values = target_q_values - alpha * next_log_pi
+
+        q_target = self.config.reward_scale * rewards + (1. - dones) * self.config.discount * target_q_values
+        qf1_loss = F.mse_loss(q1_pred, q_target.detach())
+        qf2_loss = F.mse_loss(q2_pred, q_target.detach())
+        qf_loss = qf1_loss + qf2_loss
+
+        if self.config.use_automatic_entropy_tuning:
+            self.alpha_optimizer.zero_grad()
+            alpha_loss.backward()
+            self.alpha_optimizer.step()
+
+        if self.config.use_automatic_entropy_tuning_parametrized_perturbation:
+            self.expert_alpha_optimizer.zero_grad()
+            expert_alpha_loss.backward()
+            self.expert_alpha_optimizer.step()
+
+        self.policy_optimizer.zero_grad()
+        policy_loss.backward()
+        self.policy_optimizer.step()
+
+        self.parametrized_perturbation_optimizer.zero_grad()
+        parametrized_perturbation_loss.backward()
+        self.parametrized_perturbation_optimizer.step()
+
+        self.qf_optimizer.zero_grad()
+        qf_loss.backward()
+        self.qf_optimizer.step()
+
+        if self.total_steps % self.config.target_update_period == 0:
+            self.update_target_network(
+                self.config.soft_target_update_rate
+            )
+
+        metrics_to_return = dict(
+            log_pi=log_pi.mean().item(),
+            policy_loss=policy_loss.item(),
+            parametrized_perturbation_loss=parametrized_perturbation_loss.item(),
+            qf1_loss=qf1_loss.item(),
+            qf2_loss=qf2_loss.item(),
+            alpha_loss=alpha_loss.item(),
+            alpha=alpha.item(),
+            expert_alpha_loss=expert_alpha_loss.item(),
+            expert_alpha=expert_alpha.item(),
+            average_qf1=q1_pred.mean().item(),
+            average_qf2=q2_pred.mean().item(),
+            average_target_q=target_q_values.mean().item(),
+            total_steps=self.total_steps,
+        )
+
+        return metrics_to_return
+
+    def torch_to_device(self, device: torch.device) -> None:
+        """
+        Move all modules to the specified device.
+
+        Parameters:
+        -----------
+        device: torch.device
+            The target device.
+        """
+        for module in self.modules:
+            module.to(device)
+
+    def get_action(self,
+                   env: Any,
+                   observation: np.ndarray,
+                   deterministic: bool = False,
+                   add_local_information: bool = False) -> Union[np.ndarray, Tuple[np.ndarray, float, np.ndarray]]:
+        """
+        Get an action from the policy.
+
+        Parameters:
+        -----------
+        env: Any
+            The environment.
+        observation: np.ndarray
+            The current observation.
+        deterministic: bool, optional
+            Whether to sample a deterministic action.
+        add_local_information: bool, optional
+            Whether to add local information.
+
+        Returns:
+        --------
+        Tuple[np.ndarray, float, np.ndarray]
+            The action, local information, and expert action.
+        """
+
+        action = self.sampler_policy(
+                    np.expand_dims(observation, 0), deterministic=deterministic
+                )[0, :]
+        if add_local_information:
+            use_local = self.use_local.get_use_local(env,
+                                                     observation)
+            expert_action_init = self.local_expert.get_action(observation,
+                                                              init_action=action,
+                                                              env=env)
+            expert_action = self.sampler_parametrized_perturbation(
+                np.expand_dims(observation, 0), np.expand_dims(expert_action_init, 0),
+                beta=self.beta, deterministic=deterministic
+            )[0, :]
+            if use_local:
+                return expert_action, use_local, expert_action
+            return action, use_local, expert_action
+        return action
+
+    @property
+    def modules(self) -> List[torch.nn.Module]:
+        """
+        Get a list of modules.
+
+        Returns:
+        --------
+        List[nn.Module]
+            The list of modules including policy, q-functions, and optional log_alpha.
+        """
+        modules = [self.policy, self.qf1, self.qf2, self.target_qf1, self.target_qf2]
+        if self.config.use_automatic_entropy_tuning:
+            modules.append(self.log_alpha)
+        return modules
+
+    @property
+    def total_steps(self) -> int:
+        """
+        Get the total number of steps taken.
+
+        Returns:
+        --------
+        int
+            The total number of steps.
+        """
+        return self._total_steps
diff --git a/RLLG/agents/algos/pig.py b/RLLG/agents/algos/pig.py
index 9437f962..073f6300 100644
--- a/RLLG/agents/algos/pig.py
+++ b/RLLG/agents/algos/pig.py
@@ -1,231 +1,344 @@
-# 2023.02.14-Changed for RLLG
-#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
-
-# Copyright (c) 2020 Xinyang Geng.
-
-# All rights reserved.
-
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-
-from ml_collections import ConfigDict
-import numpy as np
-import torch
-import torch.optim as optim
-import torch.nn.functional as F
-
-from agents.common.model import Scalar, soft_target_update
-
-
-class PIG(object):
-
-    @staticmethod
-    def get_default_config(updates=None):
-        config = ConfigDict()
-        config.discount = 0.99
-        config.reward_scale = 1.0
-        config.alpha_multiplier = 1.0
-        config.use_automatic_entropy_tuning = True
-        config.backup_entropy = True
-        config.target_entropy = 0.0
-        config.policy_lr = 3e-4
-        config.qf_lr = 3e-4
-        config.optimizer_type = 'adam'
-        config.soft_target_update_rate = 5e-3
-        config.target_update_period = 1
-        config.use_automatic_beta_tuning = False
-        config.target_beta = 0.0
-
-        if updates is not None:
-            config.update(ConfigDict(updates).copy_and_resolve_references())
-        return config
-
-    def __init__(self, config, policy, sampler_policy, qf1, qf2, target_qf1, target_qf2,
-                 use_local, local_expert, beta):
-        self.config = PIG.get_default_config(config)
-        self.policy = policy
-        self.sampler_policy = sampler_policy
-        self.qf1 = qf1
-        self.qf2 = qf2
-        self.target_qf1 = target_qf1
-        self.target_qf2 = target_qf2
-
-        # hyper parameter
-        self.use_local = use_local
-        self.local_expert = local_expert
-        self.beta = beta
-        self.training = True
-
-        optimizer_class = {
-            'adam': optim.Adam,
-            'sgd': optim.SGD,
-        }[self.config.optimizer_type]
-
-        self.policy_optimizer = optimizer_class(
-            self.policy.parameters(), self.config.policy_lr,
-        )
-        self.qf_optimizer = optimizer_class(
-            list(self.qf1.parameters()) + list(self.qf2.parameters()), self.config.qf_lr
-        )
-
-        if self.config.use_automatic_entropy_tuning:
-            self.log_alpha = Scalar(0.0)
-            self.alpha_optimizer = optimizer_class(
-                self.log_alpha.parameters(),
-                lr=self.config.policy_lr,
-            )
-        else:
-            self.log_alpha = None
-
-        if self.config.use_automatic_beta_tuning:
-            self.log_beta = Scalar(0.0)
-            self.beta_optimizer = optimizer_class(
-                self.log_beta.parameters(),
-                lr=self.config.policy_lr,
-            )
-        else:
-            self.log_beta = None
-
-        self.update_target_network(1.0)
-        self._total_steps = 0
-
-    def update_target_network(self, soft_target_update_rate):
-        soft_target_update(self.qf1, self.target_qf1, soft_target_update_rate)
-        soft_target_update(self.qf2, self.target_qf2, soft_target_update_rate)
-
-    def train(self, batch, batch_success=None):
-        self._total_steps += 1
-
-        observations = batch['observations']
-        actions = batch['actions']
-        rewards = batch['rewards']
-        next_observations = batch['next_observations']
-        dones = batch['dones']
-
-        # retrieve local experts information
-        lambda_s_current = batch['use_local_current']
-        # lambda_s_next = batch['use_local_next']    # no need
-        expert_actions = batch['expert_actions']
-        # next_expert_actions = batch['next_expert_actions']    # no need
-
-        new_actions, log_pi = self.policy(observations)
-
-        if self.config.use_automatic_entropy_tuning:
-            alpha_loss = -(self.log_alpha() * (log_pi + self.config.target_entropy).detach()).mean()
-            alpha = self.log_alpha().exp() * self.config.alpha_multiplier
-        else:
-            alpha_loss = observations.new_tensor(0.0)
-            alpha = observations.new_tensor(self.config.alpha_multiplier)
-
-        # It may be possible to tune beta according to a specific loss
-        if self.config.use_automatic_beta_tuning:
-            beta_loss = 1
-            beta = self.log_beta().exp()
-        else:
-            beta_loss = observations.new_tensor(0.0)
-            beta = observations.new_tensor(self.beta)
-
-        """ Policy loss """
-        q_new_actions = torch.min(
-            self.qf1(observations, new_actions),
-            self.qf2(observations, new_actions),
-        )
-        policy_loss = (alpha*log_pi - q_new_actions).mean()
-
-        # PolicyGuided learning: loss is minus likelihood of expert action
-        guided_loss = beta * -(lambda_s_current * self.policy.log_prob(observations, expert_actions)).mean()
-
-        policy_loss = policy_loss + guided_loss
-
-        """ Q function loss """
-        q1_pred = self.qf1(observations, actions)
-        q2_pred = self.qf2(observations, actions)
-
-        with torch.no_grad():
-            new_next_actions, next_log_pi = self.policy(next_observations)
-
-            target_q_values = torch.min(
-                self.target_qf1(next_observations, new_next_actions),
-                self.target_qf2(next_observations, new_next_actions),
-            )
-
-        if self.config.backup_entropy:
-            target_q_values = target_q_values - alpha * next_log_pi
-
-        q_target = self.config.reward_scale * rewards + (1. - dones) * self.config.discount * target_q_values
-        qf1_loss = F.mse_loss(q1_pred, q_target.detach())
-        qf2_loss = F.mse_loss(q2_pred, q_target.detach())
-        qf_loss = qf1_loss + qf2_loss
-
-        if self.config.use_automatic_entropy_tuning:
-            self.alpha_optimizer.zero_grad()
-            alpha_loss.backward()
-            self.alpha_optimizer.step()
-
-        self.policy_optimizer.zero_grad()
-        policy_loss.backward()
-        self.policy_optimizer.step()
-
-        self.qf_optimizer.zero_grad()
-        qf_loss.backward()
-        self.qf_optimizer.step()
-
-        if self.total_steps % self.config.target_update_period == 0:
-            self.update_target_network(
-                self.config.soft_target_update_rate
-            )
-
-        metrics_to_return = dict(
-            log_pi=log_pi.mean().item(),
-            policy_loss=policy_loss.item(),
-            guided_policy_loss=guided_loss.item(),
-            initial_policy_loss=policy_loss.item()-guided_loss.item(),
-            qf1_loss=qf1_loss.item(),
-            qf2_loss=qf2_loss.item(),
-            alpha_loss=alpha_loss.item(),
-            alpha=alpha.item(),
-            beta_loss=beta_loss.item(),
-            beta=beta.item(),
-            average_qf1=q1_pred.mean().item(),
-            average_qf2=q2_pred.mean().item(),
-            average_target_q=target_q_values.mean().item(),
-            total_steps=self.total_steps,
-        )
-
-        return metrics_to_return
-
-    def torch_to_device(self, device):
-        for module in self.modules:
-            module.to(device)
-
-    def get_action(self,
-                   env,
-                   observation,
-                   deterministic=False,
-                   add_local_information=False):
-        """
-        The PolicyGuidedAgent always picks the agent generated by the policy.
-        """
-        action = self.sampler_policy(
-                    np.expand_dims(observation, 0), deterministic=deterministic
-                )[0, :]
-        if add_local_information:
-            use_local = self.use_local.get_use_local(env,
-                                                     observation)
-            expert_action = self.local_expert.get_action(observation,
-                                                         init_action=action,
-                                                         env=env)
-            return action, use_local, expert_action
-        return action
-
-    @property
-    def modules(self):
-        modules = [self.policy, self.qf1, self.qf2, self.target_qf1, self.target_qf2]
-        if self.config.use_automatic_entropy_tuning:
-            modules.append(self.log_alpha)
-        return modules
-
-    @property
-    def total_steps(self):
-        return self._total_steps
+# 2023.02.14-Changed for RLLG
+#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
+
+# Copyright (c) 2020 Xinyang Geng.
+
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from typing import Optional, Tuple, Any, Dict, List, Union
+from ml_collections import ConfigDict
+import numpy as np
+import torch
+import torch.optim as optim
+import torch.nn.functional as F
+
+from agents.common.model import Scalar, soft_target_update, SamplerPolicy
+
+
+class PIG(object):
+    """
+    Policy Improvement Guided (PAG) algorithm implementation.
+
+    Parameters:
+    -----------
+    config: dict
+        Configuration parameters for SAC.
+    policy: torch.nn.Module
+        The policy network.
+    sampler_policy: SamplerPolicy
+        The sampler policy network.
+    qf1: torch.nn.Module
+        The first critic network.
+    qf2: torch.nn.Module
+        The second critic network.
+    target_qf1: torch.nn.Module
+        The target network for the first critic.
+    target_qf2: torch.nn.Module
+        The target network for the second critic.
+    use_local: float
+        Float confidence function indicating whether to trust local policies.
+    local_expert: Any
+        Local expert.
+    beta: float
+        Strength of regularization
+    """
+
+    @staticmethod
+    def get_default_config(updates: Optional[Dict] = None) -> ConfigDict:
+        """
+        Get the default configuration for PIG.
+
+        Parameters:
+        -----------
+        updates: dict, optional
+            Optional dictionary to update default configuration.
+
+        Returns:
+        --------
+        ConfigDict
+            Default configuration for SAC.
+        """
+        config = ConfigDict()
+        config.discount = 0.99
+        config.reward_scale = 1.0
+        config.alpha_multiplier = 1.0
+        config.use_automatic_entropy_tuning = True
+        config.backup_entropy = True
+        config.target_entropy = 0.0
+        config.policy_lr = 3e-4
+        config.qf_lr = 3e-4
+        config.optimizer_type = 'adam'
+        config.soft_target_update_rate = 5e-3
+        config.target_update_period = 1
+        config.use_automatic_beta_tuning = False
+        config.target_beta = 0.0
+
+        if updates is not None:
+            config.update(ConfigDict(updates).copy_and_resolve_references())
+        return config
+
+    def __init__(self,
+                 config: Dict,
+                 policy: torch.nn.Module,
+                 sampler_policy: SamplerPolicy,
+                 qf1: torch.nn.Module,
+                 qf2: torch.nn.Module,
+                 target_qf1: torch.nn.Module,
+                 target_qf2: torch.nn.Module,
+                 use_local: float,
+                 local_expert: Any,
+                 beta: float):
+        self.config = PIG.get_default_config(config)
+        self.policy = policy
+        self.sampler_policy = sampler_policy
+        self.qf1 = qf1
+        self.qf2 = qf2
+        self.target_qf1 = target_qf1
+        self.target_qf2 = target_qf2
+
+        # hyper parameter
+        self.use_local = use_local
+        self.local_expert = local_expert
+        self.beta = beta
+        self.training = True
+
+        optimizer_class = {
+            'adam': optim.Adam,
+            'sgd': optim.SGD,
+        }[self.config.optimizer_type]
+
+        self.policy_optimizer = optimizer_class(
+            self.policy.parameters(), self.config.policy_lr,
+        )
+        self.qf_optimizer = optimizer_class(
+            list(self.qf1.parameters()) + list(self.qf2.parameters()), self.config.qf_lr
+        )
+
+        if self.config.use_automatic_entropy_tuning:
+            self.log_alpha = Scalar(0.0)
+            self.alpha_optimizer = optimizer_class(
+                self.log_alpha.parameters(),
+                lr=self.config.policy_lr,
+            )
+        else:
+            self.log_alpha = None
+
+        if self.config.use_automatic_beta_tuning:
+            self.log_beta = Scalar(0.0)
+            self.beta_optimizer = optimizer_class(
+                self.log_beta.parameters(),
+                lr=self.config.policy_lr,
+            )
+        else:
+            self.log_beta = None
+
+        self.update_target_network(1.0)
+        self._total_steps = 0
+
+    def update_target_network(self, soft_target_update_rate: float) -> None:
+        """
+        Update the target networks with soft target updates.
+
+        Parameters:
+        -----------
+        soft_target_update_rate: float
+            Rate of soft target network updates.
+        """
+        soft_target_update(self.qf1, self.target_qf1, soft_target_update_rate)
+        soft_target_update(self.qf2, self.target_qf2, soft_target_update_rate)
+
+    def train(self, batch: Dict[str, Any], batch_success: Optional[Dict[str, torch.Tensor]] = None) -> Dict[
+        str, Any]:
+        """
+        Train the PIG agent on a batch of experiences.
+
+        Parameters:
+        -----------
+        batch: dict
+            A dictionary containing the the transitions.
+        batch_success: dict, optional
+            A dictionary containing the the transitions.
+
+        Returns:
+        --------
+        dict
+            A dictionary containing training metrics.
+        """
+        self._total_steps += 1
+
+        observations = batch['observations']
+        actions = batch['actions']
+        rewards = batch['rewards']
+        next_observations = batch['next_observations']
+        dones = batch['dones']
+
+        # retrieve local experts information
+        lambda_s_current = batch['use_local_current']
+        # lambda_s_next = batch['use_local_next']    # no need
+        expert_actions = batch['expert_actions']
+        # next_expert_actions = batch['next_expert_actions']    # no need
+
+        new_actions, log_pi = self.policy(observations)
+
+        if self.config.use_automatic_entropy_tuning:
+            alpha_loss = -(self.log_alpha() * (log_pi + self.config.target_entropy).detach()).mean()
+            alpha = self.log_alpha().exp() * self.config.alpha_multiplier
+        else:
+            alpha_loss = observations.new_tensor(0.0)
+            alpha = observations.new_tensor(self.config.alpha_multiplier)
+
+        # It may be possible to tune beta according to a specific loss
+        if self.config.use_automatic_beta_tuning:
+            beta_loss = 1
+            beta = self.log_beta().exp()
+        else:
+            beta_loss = observations.new_tensor(0.0)
+            beta = observations.new_tensor(self.beta)
+
+        """ Policy loss """
+        q_new_actions = torch.min(
+            self.qf1(observations, new_actions),
+            self.qf2(observations, new_actions),
+        )
+        policy_loss = (alpha*log_pi - q_new_actions).mean()
+
+        # PolicyGuided learning: loss is minus likelihood of expert action
+        guided_loss = beta * -(lambda_s_current * self.policy.log_prob(observations, expert_actions)).mean()
+
+        policy_loss = policy_loss + guided_loss
+
+        """ Q function loss """
+        q1_pred = self.qf1(observations, actions)
+        q2_pred = self.qf2(observations, actions)
+
+        with torch.no_grad():
+            new_next_actions, next_log_pi = self.policy(next_observations)
+
+            target_q_values = torch.min(
+                self.target_qf1(next_observations, new_next_actions),
+                self.target_qf2(next_observations, new_next_actions),
+            )
+
+        if self.config.backup_entropy:
+            target_q_values = target_q_values - alpha * next_log_pi
+
+        q_target = self.config.reward_scale * rewards + (1. - dones) * self.config.discount * target_q_values
+        qf1_loss = F.mse_loss(q1_pred, q_target.detach())
+        qf2_loss = F.mse_loss(q2_pred, q_target.detach())
+        qf_loss = qf1_loss + qf2_loss
+
+        if self.config.use_automatic_entropy_tuning:
+            self.alpha_optimizer.zero_grad()
+            alpha_loss.backward()
+            self.alpha_optimizer.step()
+
+        self.policy_optimizer.zero_grad()
+        policy_loss.backward()
+        self.policy_optimizer.step()
+
+        self.qf_optimizer.zero_grad()
+        qf_loss.backward()
+        self.qf_optimizer.step()
+
+        if self.total_steps % self.config.target_update_period == 0:
+            self.update_target_network(
+                self.config.soft_target_update_rate
+            )
+
+        metrics_to_return = dict(
+            log_pi=log_pi.mean().item(),
+            policy_loss=policy_loss.item(),
+            guided_policy_loss=guided_loss.item(),
+            initial_policy_loss=policy_loss.item()-guided_loss.item(),
+            qf1_loss=qf1_loss.item(),
+            qf2_loss=qf2_loss.item(),
+            alpha_loss=alpha_loss.item(),
+            alpha=alpha.item(),
+            beta_loss=beta_loss.item(),
+            beta=beta.item(),
+            average_qf1=q1_pred.mean().item(),
+            average_qf2=q2_pred.mean().item(),
+            average_target_q=target_q_values.mean().item(),
+            total_steps=self.total_steps,
+        )
+
+        return metrics_to_return
+
+    def torch_to_device(self, device: torch.device) -> None:
+        """
+        Move all modules to the specified device.
+
+        Parameters:
+        -----------
+        device: torch.device
+            The target device.
+        """
+        for module in self.modules:
+            module.to(device)
+
+    def get_action(self,
+                   env: Any,
+                   observation: np.ndarray,
+                   deterministic: bool = False,
+                   add_local_information: bool = False) -> Union[np.ndarray, Tuple[np.ndarray, float, np.ndarray]]:
+        """
+        Get an action from the policy.
+
+        Parameters:
+        -----------
+        env: Any
+            The environment.
+        observation: np.ndarray
+            The current observation.
+        deterministic: bool, optional
+            Whether to sample a deterministic action.
+        add_local_information: bool, optional
+            Whether to add local information.
+
+        Returns:
+        --------
+        Tuple[np.ndarray, float, np.ndarray]
+            The action, local information, and expert action.
+        """
+        action = self.sampler_policy(
+                    np.expand_dims(observation, 0), deterministic=deterministic
+                )[0, :]
+        if add_local_information:
+            use_local = self.use_local.get_use_local(env,
+                                                     observation)
+            expert_action = self.local_expert.get_action(observation,
+                                                         init_action=action,
+                                                         env=env)
+            return action, use_local, expert_action
+        return action
+
+    @property
+    def modules(self) -> List[torch.nn.Module]:
+        """
+        Get a list of modules.
+
+        Returns:
+        --------
+        List[nn.Module]
+            The list of modules including policy, q-functions, and optional log_alpha.
+        """
+        modules = [self.policy, self.qf1, self.qf2, self.target_qf1, self.target_qf2]
+        if self.config.use_automatic_entropy_tuning:
+            modules.append(self.log_alpha)
+        return modules
+
+    @property
+    def total_steps(self) -> int:
+        """
+        Get the total number of steps taken.
+
+        Returns:
+        --------
+        int
+            The total number of steps.
+        """
+        return self._total_steps
diff --git a/RLLG/agents/algos/sac.py b/RLLG/agents/algos/sac.py
index 3aa94d11..393c45aa 100644
--- a/RLLG/agents/algos/sac.py
+++ b/RLLG/agents/algos/sac.py
@@ -1,184 +1,289 @@
-# 2023.02.14-Changed for RLLG
-#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
-
-# Copyright (c) 2020 Xinyang Geng.
-
-# All rights reserved.
-
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-
-from ml_collections import ConfigDict
-import numpy as np
-import torch
-import torch.optim as optim
-import torch.nn.functional as F
-
-from agents.common.model import Scalar, soft_target_update
-
-
-class SAC(object):
-
-    @staticmethod
-    def get_default_config(updates=None):
-        config = ConfigDict()
-        config.discount = 0.99
-        config.reward_scale = 1.0
-        config.alpha_multiplier = 1.0
-        config.use_automatic_entropy_tuning = True
-        config.backup_entropy = True
-        config.target_entropy = 0.0
-        config.policy_lr = 3e-4
-        config.qf_lr = 3e-4
-        config.optimizer_type = 'adam'
-        config.soft_target_update_rate = 5e-3
-        config.target_update_period = 1
-
-        if updates is not None:
-            config.update(ConfigDict(updates).copy_and_resolve_references())
-        return config
-
-    def __init__(self, config, policy, sampler_policy, qf1, qf2, target_qf1, target_qf2):
-        self.config = SAC.get_default_config(config)
-        self.policy = policy
-        self.sampler_policy = sampler_policy
-        self.qf1 = qf1
-        self.qf2 = qf2
-        self.target_qf1 = target_qf1
-        self.target_qf2 = target_qf2
-
-        optimizer_class = {
-            'adam': optim.Adam,
-            'sgd': optim.SGD,
-        }[self.config.optimizer_type]
-
-        self.policy_optimizer = optimizer_class(
-            self.policy.parameters(), self.config.policy_lr,
-        )
-        self.qf_optimizer = optimizer_class(
-            list(self.qf1.parameters()) + list(self.qf2.parameters()), self.config.qf_lr
-        )
-
-        if self.config.use_automatic_entropy_tuning:
-            self.log_alpha = Scalar(0.0)
-            self.alpha_optimizer = optimizer_class(
-                self.log_alpha.parameters(),
-                lr=self.config.policy_lr,
-            )
-        else:
-            self.log_alpha = None
-
-        self.update_target_network(1.0)
-        self._total_steps = 0
-
-    def update_target_network(self, soft_target_update_rate):
-        soft_target_update(self.qf1, self.target_qf1, soft_target_update_rate)
-        soft_target_update(self.qf2, self.target_qf2, soft_target_update_rate)
-
-    def train(self, batch, batch_success=None):
-        self._total_steps += 1
-
-        # classic obs
-        observations = batch['observations']
-        actions = batch['actions']
-        rewards = batch['rewards']
-        next_observations = batch['next_observations']
-        dones = batch['dones']
-
-        new_actions, log_pi = self.policy(observations)
-
-        if self.config.use_automatic_entropy_tuning:
-            alpha_loss = -(self.log_alpha() * (log_pi + self.config.target_entropy).detach()).mean()
-            alpha = self.log_alpha().exp() * self.config.alpha_multiplier
-        else:
-            alpha_loss = observations.new_tensor(0.0)
-            alpha = observations.new_tensor(self.config.alpha_multiplier)
-
-        """ Policy loss """
-        q_new_actions = torch.min(
-            self.qf1(observations, new_actions),
-            self.qf2(observations, new_actions),
-        )
-        policy_loss = (alpha*log_pi - q_new_actions).mean()
-
-        """ Q function loss """
-        q1_pred = self.qf1(observations, actions)
-        q2_pred = self.qf2(observations, actions)
-
-        with torch.no_grad():
-            new_next_actions, next_log_pi = self.policy(next_observations)
-
-            target_q_values = torch.min(
-                self.target_qf1(next_observations, new_next_actions),
-                self.target_qf2(next_observations, new_next_actions),
-            )
-
-            if self.config.backup_entropy:
-                target_q_values = target_q_values - alpha * next_log_pi
-
-        q_target = self.config.reward_scale * rewards + (1. - dones) * self.config.discount * target_q_values
-        qf1_loss = F.mse_loss(q1_pred, q_target.detach())
-        qf2_loss = F.mse_loss(q2_pred, q_target.detach())
-        qf_loss = qf1_loss + qf2_loss
-
-        if self.config.use_automatic_entropy_tuning:
-            self.alpha_optimizer.zero_grad()
-            alpha_loss.backward()
-            self.alpha_optimizer.step()
-
-        self.policy_optimizer.zero_grad()
-        policy_loss.backward()
-        self.policy_optimizer.step()
-
-        self.qf_optimizer.zero_grad()
-        qf_loss.backward()
-        self.qf_optimizer.step()
-
-        if self.total_steps % self.config.target_update_period == 0:
-            self.update_target_network(
-                self.config.soft_target_update_rate
-            )
-
-        metrics_to_return = dict(
-            log_pi=log_pi.mean().item(),
-            policy_loss=policy_loss.item(),
-            qf1_loss=qf1_loss.item(),
-            qf2_loss=qf2_loss.item(),
-            alpha_loss=alpha_loss.item(),
-            alpha=alpha.item(),
-            average_qf1=q1_pred.mean().item(),
-            average_qf2=q2_pred.mean().item(),
-            average_target_q=target_q_values.mean().item(),
-            total_steps=self.total_steps,
-        )
-
-        return metrics_to_return
-
-
-    def torch_to_device(self, device):
-        for module in self.modules:
-            module.to(device)
-
-    def get_action(self,
-                   env,
-                   observation,
-                   deterministic=False,
-                   add_local_information=False):
-        action = self.sampler_policy(
-                    np.expand_dims(observation, 0), deterministic=deterministic
-                )[0, :]
-        if add_local_information:
-            return action, 0, np.zeros(action.shape)
-        return action
-
-    @property
-    def modules(self):
-        modules = [self.policy, self.qf1, self.qf2, self.target_qf1, self.target_qf2]
-        if self.config.use_automatic_entropy_tuning:
-            modules.append(self.log_alpha)
-        return modules
-
-    @property
-    def total_steps(self):
-        return self._total_steps
+# 2023.02.14-Changed for RLLG
+#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
+
+# Copyright (c) 2020 Xinyang Geng.
+
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Optional, Tuple, Any, Dict, List, Union
+from ml_collections import ConfigDict
+import numpy as np
+import torch
+import torch.optim as optim
+import torch.nn.functional as F
+from agents.common.model import Scalar, soft_target_update, SamplerPolicy
+
+
+class SAC(object):
+    """
+    Soft Actor-Critic (SAC) algorithm implementation.
+
+    Parameters:
+    -----------
+    config: dict
+        Configuration parameters for SAC.
+    policy: torch.nn.Module
+        The policy network.
+    sampler_policy: SamplerPolicy
+        The sampler policy network.
+    qf1: torch.nn.Module
+        The first critic network.
+    qf2: torch.nn.Module
+        The second critic network.
+    target_qf1: torch.nn.Module
+        The target network for the first critic.
+    target_qf2: torch.nn.Module
+        The target network for the second critic.
+    """
+
+    @staticmethod
+    def get_default_config(updates: Optional[Dict] = None) -> ConfigDict:
+        """
+        Get the default configuration for SAC.
+
+        Parameters:
+        -----------
+        updates: dict, optional
+            Optional dictionary to update default configuration.
+
+        Returns:
+        --------
+        ConfigDict
+            Default configuration for SAC.
+        """
+        config = ConfigDict()
+        config.discount = 0.99
+        config.reward_scale = 1.0
+        config.alpha_multiplier = 1.0
+        config.use_automatic_entropy_tuning = True
+        config.backup_entropy = True
+        config.target_entropy = 0.0
+        config.policy_lr = 3e-4
+        config.qf_lr = 3e-4
+        config.optimizer_type = 'adam'
+        config.soft_target_update_rate = 5e-3
+        config.target_update_period = 1
+
+        if updates is not None:
+            config.update(ConfigDict(updates).copy_and_resolve_references())
+        return config
+
+    def __init__(self,
+                 config: Dict,
+                 policy: torch.nn.Module,
+                 sampler_policy: SamplerPolicy,
+                 qf1: torch.nn.Module,
+                 qf2: torch.nn.Module,
+                 target_qf1: torch.nn.Module,
+                 target_qf2: torch.nn.Module):
+        self.config = SAC.get_default_config(config)
+        self.policy = policy
+        self.sampler_policy = sampler_policy
+        self.qf1 = qf1
+        self.qf2 = qf2
+        self.target_qf1 = target_qf1
+        self.target_qf2 = target_qf2
+
+        optimizer_class = {
+            'adam': optim.Adam,
+            'sgd': optim.SGD,
+        }[self.config.optimizer_type]
+
+        self.policy_optimizer = optimizer_class(
+            self.policy.parameters(), self.config.policy_lr,
+        )
+        self.qf_optimizer = optimizer_class(
+            list(self.qf1.parameters()) + list(self.qf2.parameters()), self.config.qf_lr
+        )
+
+        if self.config.use_automatic_entropy_tuning:
+            self.log_alpha = Scalar(0.0)
+            self.alpha_optimizer = optimizer_class(
+                self.log_alpha.parameters(),
+                lr=self.config.policy_lr,
+            )
+        else:
+            self.log_alpha = None
+
+        self.update_target_network(1.0)
+        self._total_steps = 0
+
+    def update_target_network(self, soft_target_update_rate: float) -> None:
+        """
+        Update the target networks with soft target updates.
+
+        Parameters:
+        -----------
+        soft_target_update_rate: float
+            Rate of soft target network updates.
+        """
+        soft_target_update(self.qf1, self.target_qf1, soft_target_update_rate)
+        soft_target_update(self.qf2, self.target_qf2, soft_target_update_rate)
+
+    def train(self, batch: Dict[str, Any], batch_success: Optional[Dict[str, torch.Tensor]] = None) -> Dict[
+        str, Any]:
+        """
+        Train the SAC (Soft Actor-Critic) agent on a batch of experiences.
+
+        Parameters:
+        -----------
+        batch: dict
+            A dictionary containing the the transitions.
+        batch_success: dict, optional
+            A dictionary containing the the transitions.
+
+        Returns:
+        --------
+        dict
+            A dictionary containing training metrics.
+        """
+        self._total_steps += 1
+
+        # classic obs
+        observations = batch['observations']
+        actions = batch['actions']
+        rewards = batch['rewards']
+        next_observations = batch['next_observations']
+        dones = batch['dones']
+
+        new_actions, log_pi = self.policy(observations)
+
+        if self.config.use_automatic_entropy_tuning:
+            alpha_loss = -(self.log_alpha() * (log_pi + self.config.target_entropy).detach()).mean()
+            alpha = self.log_alpha().exp() * self.config.alpha_multiplier
+        else:
+            alpha_loss = observations.new_tensor(0.0)
+            alpha = observations.new_tensor(self.config.alpha_multiplier)
+
+        """ Policy loss """
+        q_new_actions = torch.min(
+            self.qf1(observations, new_actions),
+            self.qf2(observations, new_actions),
+        )
+        policy_loss = (alpha*log_pi - q_new_actions).mean()
+
+        """ Q function loss """
+        q1_pred = self.qf1(observations, actions)
+        q2_pred = self.qf2(observations, actions)
+
+        with torch.no_grad():
+            new_next_actions, next_log_pi = self.policy(next_observations)
+
+            target_q_values = torch.min(
+                self.target_qf1(next_observations, new_next_actions),
+                self.target_qf2(next_observations, new_next_actions),
+            )
+
+            if self.config.backup_entropy:
+                target_q_values = target_q_values - alpha * next_log_pi
+
+        q_target = self.config.reward_scale * rewards + (1. - dones) * self.config.discount * target_q_values
+        qf1_loss = F.mse_loss(q1_pred, q_target.detach())
+        qf2_loss = F.mse_loss(q2_pred, q_target.detach())
+        qf_loss = qf1_loss + qf2_loss
+
+        if self.config.use_automatic_entropy_tuning:
+            self.alpha_optimizer.zero_grad()
+            alpha_loss.backward()
+            self.alpha_optimizer.step()
+
+        self.policy_optimizer.zero_grad()
+        policy_loss.backward()
+        self.policy_optimizer.step()
+
+        self.qf_optimizer.zero_grad()
+        qf_loss.backward()
+        self.qf_optimizer.step()
+
+        if self.total_steps % self.config.target_update_period == 0:
+            self.update_target_network(
+                self.config.soft_target_update_rate
+            )
+
+        metrics_to_return = dict(
+            log_pi=log_pi.mean().item(),
+            policy_loss=policy_loss.item(),
+            qf1_loss=qf1_loss.item(),
+            qf2_loss=qf2_loss.item(),
+            alpha_loss=alpha_loss.item(),
+            alpha=alpha.item(),
+            average_qf1=q1_pred.mean().item(),
+            average_qf2=q2_pred.mean().item(),
+            average_target_q=target_q_values.mean().item(),
+            total_steps=self.total_steps,
+        )
+
+        return metrics_to_return
+
+    def torch_to_device(self, device: torch.device) -> None:
+        """
+        Move all modules to the specified device.
+
+        Parameters:
+        -----------
+        device: torch.device
+            The target device.
+        """
+        for module in self.modules:
+            module.to(device)
+
+    def get_action(self,
+                   env: Any,
+                   observation: np.ndarray,
+                   deterministic: bool = False,
+                   add_local_information: bool = False) -> Union[np.ndarray, Tuple[np.ndarray, float, np.ndarray]]:
+        """
+        Get an action from the policy.
+
+        Parameters:
+        -----------
+        env: Any
+            The environment.
+        observation: np.ndarray
+            The current observation.
+        deterministic: bool, optional
+            Whether to sample a deterministic action.
+        add_local_information: bool, optional
+            Whether to add local information.
+
+        Returns:
+        --------
+        Tuple[np.ndarray, float, np.ndarray]
+            The action, local information, and expert action.
+        """
+        action = self.sampler_policy(
+                    np.expand_dims(observation, 0), deterministic=deterministic
+                )[0, :]
+        if add_local_information:
+            return action, 0, np.zeros(action.shape)
+        return action
+
+    @property
+    def modules(self) -> List[torch.nn.Module]:
+        """
+        Get a list of modules.
+
+        Returns:
+        --------
+        List[nn.Module]
+            The list of modules including policy, q-functions, and optional log_alpha.
+        """
+        modules = [self.policy, self.qf1, self.qf2, self.target_qf1, self.target_qf2]
+        if self.config.use_automatic_entropy_tuning:
+            modules.append(self.log_alpha)
+        return modules
+
+    @property
+    def total_steps(self) -> int:
+        """
+        Get the total number of steps taken.
+
+        Returns:
+        --------
+        int
+            The total number of steps.
+        """
+        return self._total_steps
diff --git a/RLLG/agents/algos/sag.py b/RLLG/agents/algos/sag.py
index db74a526..68a3e112 100644
--- a/RLLG/agents/algos/sag.py
+++ b/RLLG/agents/algos/sag.py
@@ -1,226 +1,305 @@
-# 2023.02.14-Changed for RLLG
-#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
-
-# Copyright (c) 2020 Xinyang Geng.
-
-# All rights reserved.
-
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-
-from ml_collections import ConfigDict
-import numpy as np
-import torch
-import torch.optim as optim
-import torch.nn.functional as F
-
-from agents.common.model import Scalar, soft_target_update
-
-
-class SAG(object):
-
-    @staticmethod
-    def get_default_config(updates=None):
-        config = ConfigDict()
-        config.discount = 0.99
-        config.reward_scale = 1.0
-        config.alpha_multiplier = 1.0
-        config.use_automatic_entropy_tuning = True
-        config.backup_entropy = True
-        config.target_entropy = 0.0
-        config.policy_lr = 3e-4
-        config.qf_lr = 3e-4
-        config.optimizer_type = 'adam'
-        config.soft_target_update_rate = 5e-3
-        config.target_update_period = 1
-
-        if updates is not None:
-            config.update(ConfigDict(updates).copy_and_resolve_references())
-        return config
-
-    def __init__(self, config, policy, sampler_policy, qf1, qf2, target_qf1, target_qf2,
-                 use_local, local_expert):
-        self.config = SAG.get_default_config(config)
-        self.policy = policy
-        self.sampler_policy = sampler_policy
-        self.qf1 = qf1
-        self.qf2 = qf2
-        self.target_qf1 = target_qf1
-        self.target_qf2 = target_qf2
-
-        # hyperparams
-        self.use_local = use_local
-        self.local_expert = local_expert
-
-        optimizer_class = {
-            'adam': optim.Adam,
-            'sgd': optim.SGD,
-        }[self.config.optimizer_type]
-
-        self.policy_optimizer = optimizer_class(
-            self.policy.parameters(), self.config.policy_lr,
-        )
-        self.qf_optimizer = optimizer_class(
-            list(self.qf1.parameters()) + list(self.qf2.parameters()), self.config.qf_lr
-        )
-
-        if self.config.use_automatic_entropy_tuning:
-            self.log_alpha = Scalar(0.0)
-            self.alpha_optimizer = optimizer_class(
-                self.log_alpha.parameters(),
-                lr=self.config.policy_lr,
-            )
-        else:
-            self.log_alpha = None
-
-        self.update_target_network(1.0)
-        self._total_steps = 0
-
-    def update_target_network(self, soft_target_update_rate):
-        soft_target_update(self.qf1, self.target_qf1, soft_target_update_rate)
-        soft_target_update(self.qf2, self.target_qf2, soft_target_update_rate)
-
-    def train(self, batch, batch_success=None):
-        self._total_steps += 1
-
-        # classic obs
-        observations = batch['observations']
-        actions = batch['actions']
-        rewards = batch['rewards']
-        next_observations = batch['next_observations']
-        dones = batch['dones']
-
-        # retrieve local experts information
-        lambda_s_current = batch['use_local_current']
-        lambda_s_next = batch['use_local_next']
-        expert_actions = batch['expert_actions']
-        next_expert_actions = batch['next_expert_actions']
-
-        new_actions, log_pi = self.policy(observations)
-
-        if self.config.use_automatic_entropy_tuning:
-            alpha_loss = -(self.log_alpha() * (log_pi + self.config.target_entropy).detach()).mean()
-            alpha = self.log_alpha().exp() * self.config.alpha_multiplier
-        else:
-            alpha_loss = observations.new_tensor(0.0)
-            alpha = observations.new_tensor(self.config.alpha_multiplier)
-
-        """ Policy loss """
-        if self.qf1.return_last_layer:
-            q_new_actions = torch.min(
-                self.qf1(observations, new_actions)[0],
-                self.qf2(observations, new_actions)[0],
-            )
-        else:
-            q_new_actions = torch.min(
-                self.qf1(observations, new_actions),
-                self.qf2(observations, new_actions),
-            )
-        policy_loss = (alpha*log_pi - q_new_actions).mean()
-
-        """ Q function loss """
-        if self.qf1.return_last_layer:
-            q1_pred, features_q1 = self.qf1(observations, actions)
-            q2_pred, features_q2 = self.qf2(observations, actions)
-        else:
-            q1_pred = self.qf1(observations, actions)
-            q2_pred = self.qf2(observations, actions)
-
-        with torch.no_grad():
-            new_next_actions, next_log_pi = self.policy(next_observations)
-
-            # get new next actions from local experts --> REMEMBER THE POLICY IS SWITCHED
-            # new_next_actions = (new_next_actions.T * (1 - lambda_s_next)).T + \
-            #                    (next_expert_actions.T * lambda_s_next).T
-            next_log_pi = (1 - lambda_s_next) * next_log_pi
-
-            expert_target_q_values = torch.min(
-                self.target_qf1(next_observations, next_expert_actions),
-                self.target_qf2(next_observations, next_expert_actions),
-            )
-            classic_target_q_values = torch.min(
-                self.target_qf1(next_observations, new_next_actions),
-                self.target_qf2(next_observations, new_next_actions),
-            )
-            target_q_values = lambda_s_next * expert_target_q_values + \
-                              (1 - lambda_s_next) * classic_target_q_values
-
-            if self.config.backup_entropy:
-                target_q_values = target_q_values - alpha * next_log_pi
-
-        q_target = self.config.reward_scale * rewards + (1. - dones) * self.config.discount * target_q_values
-        qf1_loss = F.mse_loss(q1_pred, q_target.detach())
-        qf2_loss = F.mse_loss(q2_pred, q_target.detach())
-        qf_loss = qf1_loss + qf2_loss
-
-        if self.config.use_automatic_entropy_tuning:
-            self.alpha_optimizer.zero_grad()
-            alpha_loss.backward()
-            self.alpha_optimizer.step()
-
-        self.policy_optimizer.zero_grad()
-        policy_loss.backward()
-        self.policy_optimizer.step()
-
-        self.qf_optimizer.zero_grad()
-        qf_loss.backward()
-        self.qf_optimizer.step()
-
-        if self.total_steps % self.config.target_update_period == 0:
-            self.update_target_network(
-                self.config.soft_target_update_rate
-            )
-
-        metrics_to_return = dict(
-            log_pi=log_pi.mean().item(),
-            policy_loss=policy_loss.item(),
-            qf1_loss=qf1_loss.item(),
-            qf2_loss=qf2_loss.item(),
-            alpha_loss=alpha_loss.item(),
-            alpha=alpha.item(),
-            average_qf1=q1_pred.mean().item(),
-            average_qf2=q2_pred.mean().item(),
-            average_target_q=target_q_values.mean().item(),
-            total_steps=self.total_steps,
-        )
-
-        return metrics_to_return
-
-    def torch_to_device(self, device):
-        for module in self.modules:
-            module.to(device)
-
-    def get_action(self,
-                   env,
-                   observation,
-                   deterministic=False,
-                   add_local_information=False):
-        """
-        In switched agent, the agent always picks the expert action if it is relevant.
-        """
-
-        action = self.sampler_policy(
-                    np.expand_dims(observation, 0), deterministic=deterministic
-                )[0, :]
-        if add_local_information:
-            use_local = self.use_local.get_use_local(env,
-                                                     observation)
-            expert_action = self.local_expert.get_action(observation,
-                                                         init_action=action,
-                                                         env=env)
-            if use_local:
-                return expert_action, use_local, expert_action
-            return action, use_local, expert_action
-        return action
-
-    @property
-    def modules(self):
-        modules = [self.policy, self.qf1, self.qf2, self.target_qf1, self.target_qf2]
-        if self.config.use_automatic_entropy_tuning:
-            modules.append(self.log_alpha)
-        return modules
-
-    @property
-    def total_steps(self):
-        return self._total_steps
+# 2023.02.14-Changed for RLLG
+#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
+
+# Copyright (c) 2020 Xinyang Geng.
+
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from typing import Optional, Tuple, Any, Dict, List, Union
+from ml_collections import ConfigDict
+import numpy as np
+import torch
+import torch.optim as optim
+import torch.nn.functional as F
+
+from agents.common.model import Scalar, soft_target_update
+
+
+class SAG(object):
+
+    @staticmethod
+    def get_default_config(updates: Optional[Dict] = None) -> ConfigDict:
+        """
+        Get the default configuration for SAG.
+
+        Parameters:
+        -----------
+        updates: dict, optional
+            Optional dictionary to update default configuration.
+
+        Returns:
+        --------
+        ConfigDict
+            Default configuration for SAC.
+        """
+        config = ConfigDict()
+        config.discount = 0.99
+        config.reward_scale = 1.0
+        config.alpha_multiplier = 1.0
+        config.use_automatic_entropy_tuning = True
+        config.backup_entropy = True
+        config.target_entropy = 0.0
+        config.policy_lr = 3e-4
+        config.qf_lr = 3e-4
+        config.optimizer_type = 'adam'
+        config.soft_target_update_rate = 5e-3
+        config.target_update_period = 1
+
+        if updates is not None:
+            config.update(ConfigDict(updates).copy_and_resolve_references())
+        return config
+
+    def __init__(self, config, policy, sampler_policy, qf1, qf2, target_qf1, target_qf2,
+                 use_local, local_expert):
+        self.config = SAG.get_default_config(config)
+        self.policy = policy
+        self.sampler_policy = sampler_policy
+        self.qf1 = qf1
+        self.qf2 = qf2
+        self.target_qf1 = target_qf1
+        self.target_qf2 = target_qf2
+
+        # hyperparams
+        self.use_local = use_local
+        self.local_expert = local_expert
+
+        optimizer_class = {
+            'adam': optim.Adam,
+            'sgd': optim.SGD,
+        }[self.config.optimizer_type]
+
+        self.policy_optimizer = optimizer_class(
+            self.policy.parameters(), self.config.policy_lr,
+        )
+        self.qf_optimizer = optimizer_class(
+            list(self.qf1.parameters()) + list(self.qf2.parameters()), self.config.qf_lr
+        )
+
+        if self.config.use_automatic_entropy_tuning:
+            self.log_alpha = Scalar(0.0)
+            self.alpha_optimizer = optimizer_class(
+                self.log_alpha.parameters(),
+                lr=self.config.policy_lr,
+            )
+        else:
+            self.log_alpha = None
+
+        self.update_target_network(1.0)
+        self._total_steps = 0
+
+    def update_target_network(self, soft_target_update_rate: float) -> None:
+        """
+        Update the target networks with soft target updates.
+
+        Parameters:
+        -----------
+        soft_target_update_rate: float
+            Rate of soft target network updates.
+        """
+        soft_target_update(self.qf1, self.target_qf1, soft_target_update_rate)
+        soft_target_update(self.qf2, self.target_qf2, soft_target_update_rate)
+
+    def train(self, batch: Dict[str, Any], batch_success: Optional[Dict[str, torch.Tensor]] = None) -> Dict[
+        str, Any]:
+        """
+        Train the SAG agent on a batch of experiences.
+
+        Parameters:
+        -----------
+        batch: dict
+            A dictionary containing the the transitions.
+        batch_success: dict, optional
+            A dictionary containing the the transitions.
+
+        Returns:
+        --------
+        dict
+            A dictionary containing training metrics.
+        """
+        self._total_steps += 1
+
+        # classic obs
+        observations = batch['observations']
+        actions = batch['actions']
+        rewards = batch['rewards']
+        next_observations = batch['next_observations']
+        dones = batch['dones']
+
+        # retrieve local experts information
+        lambda_s_current = batch['use_local_current']
+        lambda_s_next = batch['use_local_next']
+        expert_actions = batch['expert_actions']
+        next_expert_actions = batch['next_expert_actions']
+
+        new_actions, log_pi = self.policy(observations)
+
+        if self.config.use_automatic_entropy_tuning:
+            alpha_loss = -(self.log_alpha() * (log_pi + self.config.target_entropy).detach()).mean()
+            alpha = self.log_alpha().exp() * self.config.alpha_multiplier
+        else:
+            alpha_loss = observations.new_tensor(0.0)
+            alpha = observations.new_tensor(self.config.alpha_multiplier)
+
+        """ Policy loss """
+        if self.qf1.return_last_layer:
+            q_new_actions = torch.min(
+                self.qf1(observations, new_actions)[0],
+                self.qf2(observations, new_actions)[0],
+            )
+        else:
+            q_new_actions = torch.min(
+                self.qf1(observations, new_actions),
+                self.qf2(observations, new_actions),
+            )
+        policy_loss = (alpha*log_pi - q_new_actions).mean()
+
+        """ Q function loss """
+        if self.qf1.return_last_layer:
+            q1_pred, features_q1 = self.qf1(observations, actions)
+            q2_pred, features_q2 = self.qf2(observations, actions)
+        else:
+            q1_pred = self.qf1(observations, actions)
+            q2_pred = self.qf2(observations, actions)
+
+        with torch.no_grad():
+            new_next_actions, next_log_pi = self.policy(next_observations)
+
+            # get new next actions from local experts --> REMEMBER THE POLICY IS SWITCHED
+            # new_next_actions = (new_next_actions.T * (1 - lambda_s_next)).T + \
+            #                    (next_expert_actions.T * lambda_s_next).T
+            next_log_pi = (1 - lambda_s_next) * next_log_pi
+
+            expert_target_q_values = torch.min(
+                self.target_qf1(next_observations, next_expert_actions),
+                self.target_qf2(next_observations, next_expert_actions),
+            )
+            classic_target_q_values = torch.min(
+                self.target_qf1(next_observations, new_next_actions),
+                self.target_qf2(next_observations, new_next_actions),
+            )
+            target_q_values = lambda_s_next * expert_target_q_values + \
+                              (1 - lambda_s_next) * classic_target_q_values
+
+            if self.config.backup_entropy:
+                target_q_values = target_q_values - alpha * next_log_pi
+
+        q_target = self.config.reward_scale * rewards + (1. - dones) * self.config.discount * target_q_values
+        qf1_loss = F.mse_loss(q1_pred, q_target.detach())
+        qf2_loss = F.mse_loss(q2_pred, q_target.detach())
+        qf_loss = qf1_loss + qf2_loss
+
+        if self.config.use_automatic_entropy_tuning:
+            self.alpha_optimizer.zero_grad()
+            alpha_loss.backward()
+            self.alpha_optimizer.step()
+
+        self.policy_optimizer.zero_grad()
+        policy_loss.backward()
+        self.policy_optimizer.step()
+
+        self.qf_optimizer.zero_grad()
+        qf_loss.backward()
+        self.qf_optimizer.step()
+
+        if self.total_steps % self.config.target_update_period == 0:
+            self.update_target_network(
+                self.config.soft_target_update_rate
+            )
+
+        metrics_to_return = dict(
+            log_pi=log_pi.mean().item(),
+            policy_loss=policy_loss.item(),
+            qf1_loss=qf1_loss.item(),
+            qf2_loss=qf2_loss.item(),
+            alpha_loss=alpha_loss.item(),
+            alpha=alpha.item(),
+            average_qf1=q1_pred.mean().item(),
+            average_qf2=q2_pred.mean().item(),
+            average_target_q=target_q_values.mean().item(),
+            total_steps=self.total_steps,
+        )
+
+        return metrics_to_return
+
+    def torch_to_device(self, device: torch.device) -> None:
+        """
+        Move all modules to the specified device.
+
+        Parameters:
+        -----------
+        device: torch.device
+            The target device.
+        """
+        for module in self.modules:
+            module.to(device)
+
+    def get_action(self,
+                   env: Any,
+                   observation: np.ndarray,
+                   deterministic: bool = False,
+                   add_local_information: bool = False) -> Union[np.ndarray, Tuple[np.ndarray, float, np.ndarray]]:
+        """
+        Get an action from the policy.
+
+        Parameters:
+        -----------
+        env: Any
+            The environment.
+        observation: np.ndarray
+            The current observation.
+        deterministic: bool, optional
+            Whether to sample a deterministic action.
+        add_local_information: bool, optional
+            Whether to add local information.
+
+        Returns:
+        --------
+        Tuple[np.ndarray, float, np.ndarray]
+            The action, local information, and expert action.
+        """
+
+        action = self.sampler_policy(
+                    np.expand_dims(observation, 0), deterministic=deterministic
+                )[0, :]
+        if add_local_information:
+            use_local = self.use_local.get_use_local(env,
+                                                     observation)
+            expert_action = self.local_expert.get_action(observation,
+                                                         init_action=action,
+                                                         env=env)
+            if use_local:
+                return expert_action, use_local, expert_action
+            return action, use_local, expert_action
+        return action
+
+    @property
+    def modules(self) -> List[torch.nn.Module]:
+        """
+        Get a list of modules.
+
+        Returns:
+        --------
+        List[nn.Module]
+            The list of modules including policy, q-functions, and optional log_alpha.
+        """
+        modules = [self.policy, self.qf1, self.qf2, self.target_qf1, self.target_qf2]
+        if self.config.use_automatic_entropy_tuning:
+            modules.append(self.log_alpha)
+        return modules
+
+    @property
+    def total_steps(self) -> int:
+        """
+        Get the total number of steps taken.
+
+        Returns:
+        --------
+        int
+            The total number of steps.
+        """
+        return self._total_steps
+
diff --git a/RLLG/agents/common/config.py b/RLLG/agents/common/config.py
new file mode 100644
index 00000000..ab5b6ccb
--- /dev/null
+++ b/RLLG/agents/common/config.py
@@ -0,0 +1,67 @@
+from typing import Any, Dict, List, Optional, Tuple
+
+
+def process_glob_config(config: Dict[str, Any]) \
+        -> Tuple[List[str], Dict[str, Any], Dict[str, Any], Dict[str, Any], Dict[str, Any], List[float]]:
+    """
+    Process a global configuration dictionary and extract relevant information.
+
+    Parameters:
+    ----------
+    config : Dict[str, Any]
+        The global configuration dictionary.
+
+    Returns:
+    ----------
+    Tuple[List[str], Optional[Dict[str, Any]], Dict[str, Any], Dict[str, Any], Dict[str, Any], List[float]]
+        A tuple containing the extracted information:
+        - List of expert names.
+        - Dictionary for position tolerance (or None if not present).
+        - Dictionary for values for beta depending on the agent.
+        - Dictionary for values for delta depending on the agent.
+        - Dictionary for values for phi depending on the agent.
+        - List of decay parameters.
+        """
+    expert_names = config['local_experts']
+    del config['local_experts']
+    dict_pos_tol = None
+    if 'pos_tol' in config:
+        dict_pos_tol = config['pos_tol']
+        del config['pos_tol']
+    dict_beta = config['beta']
+    dict_delta = config['delta']
+    dict_phi = config['phi']
+    del config['beta']
+    del config['delta']
+    del config['phi']
+    decay_parameter_list = config['decay_parameter']
+    del config['decay_parameter']
+    return expert_names, dict_pos_tol, dict_beta, dict_delta, dict_phi, decay_parameter_list
+
+
+def process_config_per_agent(config: Dict[str, Any],
+                             agent_name: str,
+                             dict_beta: Dict[str, Any],
+                             dict_delta: Dict[str, Any],
+                             dict_phi: Dict[str, Any],
+                             dict_pos_tol: Dict[str, Any]) -> None:
+    """
+    Process the configuration dictionary to make it dependant on the agent
+
+    Parameters:
+    ----------
+    config : Dict[str, Any]
+        The global configuration dictionary to be updated
+    dict_pos_tol: Optional[Dict[str, Any]])
+        Add pos_tol argument to config dictionary. It is only usefull for the safe cartpole environment
+
+    Returns:
+    ----------
+    None
+        The function does not return anything.
+    """
+    if dict_pos_tol is not None:
+        config['pos_tol'] = dict_pos_tol[agent_name]
+    config['beta'] = dict_beta[agent_name]
+    config['delta'] = dict_delta[agent_name]
+    config['phi'] = dict_phi[agent_name]
diff --git a/RLLG/agents/common/creation_utils.py b/RLLG/agents/common/creation_utils.py
new file mode 100644
index 00000000..0e2af919
--- /dev/null
+++ b/RLLG/agents/common/creation_utils.py
@@ -0,0 +1,150 @@
+from typing import Type, Any, Dict, List, Optional, Tuple
+from envs.creation import get_env_and_control
+from agents.algos.sac import SAC
+from agents.algos.sag import SAG
+from agents.algos.pag import PAG
+from agents.algos.pig import PIG
+from agents.common.model import SamplerPolicy, ExpertSamplerPolicy
+from envs.cartpole.confidence import LambdaS
+import gym
+import torch
+
+
+dict_agents = {
+    'SAC': SAC,
+    'SAG': SAG,
+    'PIG': PIG,
+    'PAG': PAG,
+}
+
+
+def create_envs(cfg: Dict[str, Any]) -> Tuple[Type[gym.Env], Dict[str, Any], Type[gym.Env], Dict[str, Any]]:
+    """
+    Create training and testing environments with associated local control dictionaries based on the provided configuration.
+
+    Parameters:
+    ----------
+    cfg : Dict[str, Any]
+        The configuration dictionary
+
+    Returns:
+    ----------
+    Tuple[Type[gym.Env], Dict[str, Any], Type[gym.Env], Dict[str, Any]]
+        A tuple containing the training and testing environments along with their respective local control dictionaries.
+    """
+    limit_cart = None
+    reward_end = None
+    pos_tol = None
+    if 'limit_cart' in cfg:
+        limit_cart = cfg['limit_cart']
+    if 'reward_end' in cfg:
+        reward_end = cfg['reward_end']
+    if 'pos_tol' in cfg:
+        pos_tol = cfg['pos_tol']
+    env_train, local_control_dict_train = get_env_and_control(name=cfg['env'],
+                                                              orig_cwd=cfg['orig_cwd'],
+                                                              device=cfg['device'],
+                                                              limit_cart=limit_cart,
+                                                              reward_end=reward_end,
+                                                              pos_tol=pos_tol
+                                                              )
+    env_test, local_control_dict_test = get_env_and_control(name=cfg['env'],
+                                                            orig_cwd=cfg['orig_cwd'],
+                                                            device=cfg['device'],
+                                                            limit_cart=limit_cart,
+                                                            reward_end=reward_end,
+                                                            pos_tol=pos_tol
+                                                            )
+    return env_train, local_control_dict_train, env_test, local_control_dict_test
+
+
+def create_agent(cfg: Dict[str, Any],
+                 agent_name: str,
+                 policy: torch.nn.Module,
+                 sampler_policy: SamplerPolicy,
+                 qf1: torch.nn.Module,
+                 qf2: torch.nn.Module,
+                 target_qf1: torch.nn.Module,
+                 target_qf2: torch.nn.Module,
+                 lambda_s: Optional[LambdaS] = None,
+                 local_expert: Optional[Any] = None,
+                 parametrized_perturbation: Optional[Type[torch.nn.Module]] = None,
+                 sampler_parametrized_perturbation: Optional[Type[ExpertSamplerPolicy]] = None) \
+        -> Any:
+    """
+    Create an instance of an RL agent based on the specified configuration and components.
+
+    Parameters:
+    ----------
+    cfg : Dict[str, Any]
+        The configuration dictionary.
+    agent_name : str
+        The name of the agent to be created.
+    policy : Type[torch.nn.Module]
+        The policy network.
+    sampler_policy : Type[SamplerPolicy]
+        The policy sampler.
+    qf1 : Type[torch.nn.Module]
+        The first critic network.
+    qf2 : Type[torch.nn.Module]
+        The second critic network.
+    target_qf1 : Type[torch.nn.Module]
+        The target network for the first critic.
+    target_qf2 : Type[torch.nn.Module]
+        The target network for the second critic.
+    lambda_s : Optional[Type[LambdaS]]
+        The lambda_s confidence class (optional).
+    local_expert : Optional[Type[Any]]
+        The local expert (optional, and can be under any form).
+    parametrized_perturbation : Optional[Type[torch.nn.Module]]
+        The parametrized perturbation network (optional).
+    sampler_parametrized_perturbation : Optional[Type[ExpertSamplerPolicy]]
+        The sampler for the parametrized perturbation network (optional).
+
+    Returns:
+    ----------
+    Any
+        An instance of the specified RL agent.
+    """
+    if cfg['agent_name'] == 'SAC':
+        agent = dict_agents[agent_name](cfg,
+                                        policy,
+                                        sampler_policy,
+                                        qf1,
+                                        qf2,
+                                        target_qf1,
+                                        target_qf2)
+    elif cfg['agent_name'] == 'SAG':
+        agent = dict_agents[agent_name](cfg,
+                                        policy,
+                                        sampler_policy,
+                                        qf1,
+                                        qf2,
+                                        target_qf1,
+                                        target_qf2,
+                                        use_local=lambda_s,
+                                        local_expert=local_expert)
+    elif cfg['agent_name'] == 'PIG':
+        agent = dict_agents[agent_name](cfg,
+                                        policy,
+                                        sampler_policy,
+                                        qf1,
+                                        qf2,
+                                        target_qf1,
+                                        target_qf2,
+                                        use_local=lambda_s,
+                                        local_expert=local_expert,
+                                        beta=cfg['beta'])
+    else:
+        agent = dict_agents[agent_name](cfg,
+                                        policy,
+                                        sampler_policy,
+                                        qf1,
+                                        qf2,
+                                        target_qf1,
+                                        target_qf2,
+                                        use_local=lambda_s,
+                                        local_expert=local_expert,
+                                        parametrized_perturbation=parametrized_perturbation,
+                                        sampler_parametrized_perturbation=sampler_parametrized_perturbation)
+    return agent
diff --git a/RLLG/agents/common/model.py b/RLLG/agents/common/model.py
index dce82d22..717b8279 100644
--- a/RLLG/agents/common/model.py
+++ b/RLLG/agents/common/model.py
@@ -1,297 +1,653 @@
-# 2023.02.14-Changed for RLLG
-#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
-
-# Copyright (c) 2020 Xinyang Geng.
-
-# All rights reserved.
-
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-
-import numpy as np
-import torch
-import torch.nn as nn
-from torch.distributions import Normal
-from torch.distributions.transformed_distribution import TransformedDistribution
-from torch.distributions.transforms import TanhTransform
-
-
-def extend_and_repeat(tensor, dim, repeat):
-    # Extend and repeast the tensor along dim axie and repeat it
-    ones_shape = [1 for _ in range(tensor.ndim + 1)]
-    ones_shape[dim] = repeat
-    return torch.unsqueeze(tensor, dim) * tensor.new_ones(ones_shape)
-
-
-def soft_target_update(network, target_network, soft_target_update_rate):
-    target_network_params = {k: v for k, v in target_network.named_parameters()}
-    for k, v in network.named_parameters():
-        target_network_params[k].data = (
-            (1 - soft_target_update_rate) * target_network_params[k].data
-            + soft_target_update_rate * v.data
-        )
-
-
-class FullyConnectedNetwork(nn.Module):
-
-    def __init__(self, input_dim, output_dim, arch='256-256',
-                 activation="relu", return_last_layer=False):
-        super().__init__()
-        self.input_dim = input_dim
-        self.output_dim = output_dim
-        self.arch = arch
-        self.activation = activation
-        self.return_last_layer = return_last_layer
-
-        d = input_dim
-        modules = []
-        hidden_sizes = [int(h) for h in arch.split('-')]
-
-        for hidden_size in hidden_sizes:
-            fc = nn.Linear(d, hidden_size)
-            modules.append(fc)
-            if self.activation == 'relu':
-                modules.append(nn.ReLU())
-            elif self.activation == 'tanh':
-                modules.append(nn.Tanh())
-            else:
-                raise NotImplementedError(f'activation is {self.activation}')
-            d = hidden_size
-
-        last_fc = nn.Linear(d, output_dim)
-
-        if self.return_last_layer:
-            self.network_but_last = nn.Sequential(*modules)
-            self.last_fc = last_fc
-        else:
-            modules.append(last_fc)
-            self.network = nn.Sequential(*modules)
-
-    def forward(self, input_tensor):
-        if self.return_last_layer:
-            last_layer = self.network_but_last(input_tensor)
-            return self.last_fc(last_layer), last_layer.clone()
-        return self.network(input_tensor)
-
-
-class ReparameterizedTanhGaussian(nn.Module):
-
-    def __init__(self, log_std_min=-20.0, log_std_max=2.0, no_tanh=False):
-        super().__init__()
-        self.log_std_min = log_std_min
-        self.log_std_max = log_std_max
-        self.no_tanh = no_tanh
-
-    def log_prob(self, mean, log_std, sample):
-        log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max)
-        std = torch.exp(log_std)
-        if self.no_tanh:
-            action_distribution = Normal(mean, std)
-        else:
-            action_distribution = TransformedDistribution(
-                Normal(mean, std), TanhTransform(cache_size=1)
-            )
-        return torch.sum(action_distribution.log_prob(sample), dim=-1)
-
-    def forward(self, mean, log_std, deterministic=False):
-        log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max)
-        std = torch.exp(log_std)
-
-        if self.no_tanh:
-            action_distribution = Normal(mean, std)
-        else:
-            action_distribution = TransformedDistribution(
-                Normal(mean, std), TanhTransform(cache_size=1)
-            )
-
-        if deterministic:
-            action_sample = torch.tanh(mean)
-        else:
-            action_sample = action_distribution.rsample()
-
-        log_prob = torch.sum(
-            action_distribution.log_prob(action_sample), dim=-1
-        )
-
-        return action_sample, log_prob
-
-
-class TanhGaussianPolicy(nn.Module):
-
-    def __init__(self, observation_dim, action_dim, arch='256-256',
-                 log_std_multiplier=1.0, log_std_offset=-1.0, no_tanh=False,
-                 activation='relu'):
-        super().__init__()
-        self.observation_dim = observation_dim
-        self.action_dim = action_dim
-        self.arch = arch
-        self.no_tanh = no_tanh
-
-        self.base_network = FullyConnectedNetwork(
-            observation_dim, 2 * action_dim, arch,
-            activation=activation
-        )
-        self.log_std_multiplier = Scalar(log_std_multiplier)
-        self.log_std_offset = Scalar(log_std_offset)
-        self.tanh_gaussian = ReparameterizedTanhGaussian(no_tanh=no_tanh)
-
-    def log_prob(self, observations, actions):
-        if actions.ndim == 3:
-            observations = extend_and_repeat(observations, 1, actions.shape[1])
-        base_network_output = self.base_network(observations)
-        mean, log_std = torch.split(base_network_output, self.action_dim, dim=-1)
-        log_std = self.log_std_multiplier() * log_std + self.log_std_offset()
-        return self.tanh_gaussian.log_prob(mean, log_std, actions)
-
-    def forward(self, observations, deterministic=False, repeat=None):
-        if repeat is not None:
-            observations = extend_and_repeat(observations, 1, repeat)
-        base_network_output = self.base_network(observations)
-        mean, log_std = torch.split(base_network_output, self.action_dim, dim=-1)
-        log_std = self.log_std_multiplier() * log_std + self.log_std_offset()
-        return self.tanh_gaussian(mean, log_std, deterministic)
-
-
-
-class ParametrizedPerturbationTanhGaussianPolicy(nn.Module):
-
-    def __init__(self,
-                 observation_dim,
-                 action_dim,
-                 arch='256-256',
-                 log_std_multiplier=1.0,
-                 log_std_offset=-1.0,
-                 no_tanh=False,
-                 activation='relu',
-                 phi=0.5):
-        super().__init__()
-        self.observation_dim = observation_dim
-        self.action_dim = action_dim
-        self.arch = arch
-        self.no_tanh = no_tanh
-        self.phi = phi
-
-        self.base_network = FullyConnectedNetwork(
-            observation_dim, 2 * action_dim, arch,
-            activation=activation
-        )
-        self.log_std_multiplier = Scalar(log_std_multiplier)
-        self.log_std_offset = Scalar(log_std_offset)
-        self.tanh_gaussian = ReparameterizedTanhGaussian(no_tanh=no_tanh)
-
-    def log_prob(self, observations, actions, expert_actions):
-
-        if actions.ndim == 3:
-            observations = extend_and_repeat(observations, 1, actions.shape[1])
-        base_network_output = self.base_network(observations)
-        mean, log_std = torch.split(base_network_output, self.action_dim, dim=-1)
-        log_std = self.log_std_multiplier() * log_std + self.log_std_offset()
-
-        # get reversed actions to get the log prob of the expert parametrized policy
-        phi_actions = (actions - expert_actions) / self.phi
-
-        return self.tanh_gaussian.log_prob(mean, log_std, phi_actions)
-
-    def forward(self, observations, expert_actions, beta=0., deterministic=False, repeat=None):
-        if repeat is not None:
-            observations = extend_and_repeat(observations, 1, repeat)
-        base_network_output = self.base_network(observations)
-        mean, log_std = torch.split(base_network_output, self.action_dim, dim=-1)
-        log_std = self.log_std_multiplier() * log_std + self.log_std_offset()
-        actions, log_probs = self.tanh_gaussian(mean, log_std, deterministic)
-        return (expert_actions + self.phi * (1 - beta) * actions).clamp(-0.999, 0.999), log_probs
-
-
-class SamplerPolicy(object):
-
-    def __init__(self, policy, device, from_ext=False):
-        self.policy = policy
-        self.device = device
-        self.from_ext = from_ext
-
-    def __call__(self, observations, deterministic=False):
-        with torch.no_grad():
-            observations = torch.tensor(
-                observations, dtype=torch.float32, device=self.device
-            )
-            actions, _ = self.policy(observations, deterministic)
-            actions = actions.cpu().numpy()
-        return np.clip(actions, a_min=-0.999, a_max=0.999)
-
-
-class ExpertSamplerPolicy(object):
-
-    def __init__(self, policy, device, from_ext=False):
-        self.policy = policy
-        self.device = device
-        self.from_ext = from_ext
-
-    def __call__(self, observations, expert_actions, beta=1., deterministic=False):
-        with torch.no_grad():
-            observations = torch.tensor(
-                observations, dtype=torch.float32, device=self.device
-            )
-            expert_actions = torch.tensor(
-                expert_actions, dtype=torch.float32, device=self.device
-            )
-            actions, _ = self.policy(observations, expert_actions,
-                                     beta=beta,
-                                     deterministic=deterministic)
-            actions = actions.cpu().numpy()
-        return np.clip(actions, a_min=-0.999, a_max=0.999)
-
-
-class FullyConnectedQFunction(nn.Module):
-
-    def __init__(self, observation_dim, action_dim, arch='256-256',
-                 activation='relu', return_last_layer=False):
-        super().__init__()
-        self.observation_dim = observation_dim
-        self.action_dim = action_dim
-        self.arch = arch
-        self.return_last_layer = return_last_layer
-        self.network = FullyConnectedNetwork(
-            observation_dim + action_dim, 1, arch, activation=activation,
-            return_last_layer=return_last_layer
-        )
-
-    def forward(self, observations, actions):
-        if actions.ndim == 3 and observations.ndim == 2:
-            observations = extend_and_repeat(observations, 1, actions.shape[1])
-        input_tensor = torch.cat([observations, actions], dim=-1)
-        if self.return_last_layer:
-            output, last_layer = self.network(input_tensor)
-            return torch.squeeze(output, dim=-1), last_layer
-        output = self.network(input_tensor)
-        return torch.squeeze(output, dim=-1)
-
-
-class TD3Policy(nn.Module):
-
-    def __init__(self, observation_dim, action_dim, arch='256-256'):
-        super(TD3Policy, self).__init__()
-        self.arch = arch
-
-        self.base_network = FullyConnectedNetwork(
-            observation_dim, action_dim, arch
-        )
-
-
-    def forward(self, observation, deterministic=False):
-        """
-        Added the deterministic argument to be consitent with the code.
-        """
-        a_init = self.base_network(observation)
-        return torch.tanh(a_init), None
-
-
-class Scalar(nn.Module):
-    def __init__(self, init_value):
-        super().__init__()
-        self.constant = nn.Parameter(
-            torch.tensor(init_value, dtype=torch.float32)
-        )
-
-    def forward(self):
-        return self.constant
+# 2023.02.14-Changed for RLLG
+#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
+
+# Copyright (c) 2020 Xinyang Geng.
+
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from typing import Tuple, Optional, Union
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.distributions import Normal
+from torch.distributions.transformed_distribution import TransformedDistribution
+from torch.distributions.transforms import TanhTransform
+
+
+def extend_and_repeat(tensor: torch.Tensor, dim: int, repeat: int) -> torch.Tensor:
+    """
+    Extend and repeat the tensor along the specified axis.
+
+    Parameters:
+    ----------
+    tensor : torch.Tensor
+        Input tensor.
+    dim : int
+        Dimension along which to extend and repeat.
+    repeat : int
+        Number of times to repeat the tensor.
+
+    Returns:
+    ----------
+    torch.Tensor
+        Extended and repeated tensor.
+    """
+    # Extend and repeast the tensor along dim axie and repeat it
+    ones_shape = [1 for _ in range(tensor.ndim + 1)]
+    ones_shape[dim] = repeat
+    return torch.unsqueeze(tensor, dim) * tensor.new_ones(ones_shape)
+
+
+def soft_target_update(network: nn.Module, target_network: nn.Module, soft_target_update_rate: float) -> None:
+    """
+    Update the target network parameters using a soft update.
+
+    Parameters:
+    ----------
+    network : nn.Module
+        The source network.
+    target_network : nn.Module
+        The target network to be updated.
+    soft_target_update_rate : float
+        The soft update rate.
+
+    Returns:
+    ----------
+    None
+    """
+    target_network_params = {k: v for k, v in target_network.named_parameters()}
+    for k, v in network.named_parameters():
+        target_network_params[k].data = (
+            (1 - soft_target_update_rate) * target_network_params[k].data
+            + soft_target_update_rate * v.data
+        )
+
+
+class FullyConnectedNetwork(nn.Module):
+    """
+    Fully connected neural network module.
+
+    Parameters:
+    ----------
+    input_dim : int
+        Dimension of the input.
+    output_dim : int
+        Dimension of the output.
+    arch : str, optional
+        Architecture of the network (default is '256-256').
+    activation : str, optional
+        Activation function (default is 'relu').
+    return_last_layer : bool, optional
+        Whether to return only the last layer (default is False).
+    """
+
+    def __init__(self, input_dim: int, output_dim: int, arch: Optional[str] = '256-256',
+                 activation: Optional[str] = "relu", return_last_layer: Optional[bool] = False):
+        super().__init__()
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        self.arch = arch
+        self.activation = activation
+        self.return_last_layer = return_last_layer
+
+        d = input_dim
+        modules = []
+        hidden_sizes = [int(h) for h in arch.split('-')]
+
+        for hidden_size in hidden_sizes:
+            fc = nn.Linear(d, hidden_size)
+            modules.append(fc)
+            if self.activation == 'relu':
+                modules.append(nn.ReLU())
+            elif self.activation == 'tanh':
+                modules.append(nn.Tanh())
+            else:
+                raise NotImplementedError(f'activation is {self.activation}')
+            d = hidden_size
+
+        last_fc = nn.Linear(d, output_dim)
+
+        if self.return_last_layer:
+            self.network_but_last = nn.Sequential(*modules)
+            self.last_fc = last_fc
+        else:
+            modules.append(last_fc)
+            self.network = nn.Sequential(*modules)
+
+    def forward(self, input_tensor: torch.Tensor) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+        """
+        Forward pass through the network.
+
+        Parameters:
+        ----------
+        input_tensor : torch.Tensor
+            Input tensor.
+
+        Returns:
+        ----------
+        torch.Tensor or Tuple[torch.Tensor, torch.Tensor]
+            The output of the network, and optionally, the output of the last layer.
+        """
+        if self.return_last_layer:
+            last_layer = self.network_but_last(input_tensor)
+            return self.last_fc(last_layer), last_layer.clone()
+        return self.network(input_tensor)
+
+
+class ReparameterizedTanhGaussian(nn.Module):
+    """
+    Tanh Gaussian distribution with reparametrized trick.
+
+    Parameters:
+    ----------
+    log_std_min : Optional[float], optional
+        Minimum value for the log standard deviation (default is -20.0).
+    log_std_max : Optional[float], optional
+        Maximum value for the log standard deviation (default is 2.0).
+    no_tanh : Optional[bool], optional
+        Whether to skip applying tanh to the sampled actions (default is False).
+    """
+
+    def __init__(self, log_std_min: Optional[float] = -20.0,
+                 log_std_max: Optional[float] = 2.0,
+                 no_tanh: Optional[bool] = False):
+        super().__init__()
+        self.log_std_min = log_std_min
+        self.log_std_max = log_std_max
+        self.no_tanh = no_tanh
+
+    def log_prob(self, mean: torch.Tensor, log_std: torch.Tensor, sample: torch.Tensor) -> torch.Tensor:
+        """
+        Compute the log probability of a sample under the distribution.
+
+        Parameters:
+        ----------
+        mean : torch.Tensor
+            Mean of the distribution.
+        log_std : torch.Tensor
+            Log standard deviation of the distribution.
+        sample : torch.Tensor
+            Sample to compute the log probability for.
+
+        Returns:
+        ----------
+        torch.Tensor
+            Log probability of the sample.
+        """
+        log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max)
+        std = torch.exp(log_std)
+        if self.no_tanh:
+            action_distribution = Normal(mean, std)
+        else:
+            action_distribution = TransformedDistribution(
+                Normal(mean, std), TanhTransform(cache_size=1)
+            )
+        return torch.sum(action_distribution.log_prob(sample), dim=-1)
+
+    def forward(self, mean: torch.Tensor, log_std: torch.Tensor, deterministic: Optional[bool] = False) -> Tuple[
+        torch.Tensor, torch.Tensor]:
+        """
+        Generate a sample and compute the log probability.
+
+        Parameters:
+        ----------
+        mean : torch.Tensor
+            Mean of the distribution.
+        log_std : torch.Tensor
+            Log standard deviation of the distribution.
+        deterministic : bool, optional
+            Flag indicating whether to sample deterministically (default is False).
+
+        Returns:
+        ----------
+        Tuple[torch.Tensor, torch.Tensor]
+            Generated action sample and its log probability.
+        """
+        log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max)
+        std = torch.exp(log_std)
+
+        if self.no_tanh:
+            action_distribution = Normal(mean, std)
+        else:
+            action_distribution = TransformedDistribution(
+                Normal(mean, std), TanhTransform(cache_size=1)
+            )
+
+        if deterministic:
+            action_sample = torch.tanh(mean)
+        else:
+            action_sample = action_distribution.rsample()
+
+        log_prob = torch.sum(
+            action_distribution.log_prob(action_sample), dim=-1
+        )
+
+        return action_sample, log_prob
+
+
+class TanhGaussianPolicy(nn.Module):
+    """
+    Policy module representing a Tanh Gaussian policy.
+
+    Parameters:
+    ----------
+    observation_dim : int
+        Dimensionality of the observation space.
+    action_dim : int
+        Dimensionality of the action space.
+    arch : str, optional
+        Architecture of the base network (default is '256-256').
+    log_std_multiplier : float, optional
+        Multiplier for the log standard deviation (default is 1.0).
+    log_std_offset : float, optional
+        Offset for the log standard deviation (default is -1.0).
+    no_tanh : bool, optional
+        Whether to skip applying tanh to the sampled actions (default is False).
+    activation : str, optional
+        Activation function used in the base network (default is 'relu').
+    """
+
+    def __init__(self, observation_dim: int, action_dim: int, arch: Optional[str] = '256-256',
+                 log_std_multiplier: Optional[float] = 1.0, log_std_offset: Optional[float] = -1.0,
+                 no_tanh: Optional[bool] = False, activation: Optional[str] = 'relu'):
+        super().__init__()
+        self.observation_dim = observation_dim
+        self.action_dim = action_dim
+        self.arch = arch
+        self.no_tanh = no_tanh
+
+        self.base_network = FullyConnectedNetwork(
+            observation_dim, 2 * action_dim, arch,
+            activation=activation
+        )
+        self.log_std_multiplier = Scalar(log_std_multiplier)
+        self.log_std_offset = Scalar(log_std_offset)
+        self.tanh_gaussian = ReparameterizedTanhGaussian(no_tanh=no_tanh)
+
+    def log_prob(self, observations: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
+        """
+        Compute the log probability of a given set of actions.
+
+        Parameters:
+        ----------
+        observations : torch.Tensor
+            Observations to condition the policy on.
+        actions : torch.Tensor
+            Actions for which to compute the log probability.
+
+        Returns:
+        ----------
+        torch.Tensor
+            Log probability of the given actions.
+        """
+        if actions.ndim == 3:
+            observations = extend_and_repeat(observations, 1, actions.shape[1])
+        base_network_output = self.base_network(observations)
+        mean, log_std = torch.split(base_network_output, self.action_dim, dim=-1)
+        log_std = self.log_std_multiplier() * log_std + self.log_std_offset()
+        return self.tanh_gaussian.log_prob(mean, log_std, actions)
+
+    def forward(self, observations: torch.Tensor, deterministic: bool = False, repeat: Optional[int] = None) -> Tuple[
+        torch.Tensor, torch.Tensor]:
+        """
+        Generate a sample and compute the log probability.
+
+        Parameters:
+        ----------
+        observations : torch.Tensor
+            Observations to condition the policy on.
+        deterministic : bool, optional
+            Flag indicating whether to sample deterministically (default is False).
+        repeat : Optional[int], optional
+            Number of times to repeat the action sampling (default is None).
+
+        Returns:
+        ----------
+        Tuple[torch.Tensor, torch.Tensor]
+            Generated action sample and its log probability.
+        """
+        if repeat is not None:
+            observations = extend_and_repeat(observations, 1, repeat)
+        base_network_output = self.base_network(observations)
+        mean, log_std = torch.split(base_network_output, self.action_dim, dim=-1)
+        log_std = self.log_std_multiplier() * log_std + self.log_std_offset()
+        return self.tanh_gaussian(mean, log_std, deterministic)
+
+
+
+class ParametrizedPerturbationTanhGaussianPolicy(nn.Module):
+    """
+    Policy module representing the parametrized perturbation Tanh Gaussian policy.
+
+    Parameters:
+    ----------
+    observation_dim : int
+        Dimensionality of the observation space.
+    action_dim : int
+        Dimensionality of the action space.
+    arch : str, optional
+        Architecture of the base network (default is '256-256').
+    log_std_multiplier : float, optional
+        Multiplier for the log standard deviation (default is 1.0).
+    log_std_offset : float, optional
+        Offset for the log standard deviation (default is -1.0).
+    no_tanh : bool, optional
+        Whether to skip applying tanh to the sampled actions (default is False).
+    activation : str, optional
+        Activation function used in the base network (default is 'relu').
+    phi : float, optional
+        Phi parameter for the perturbation (default is 0.5).
+    """
+
+    def __init__(self,
+                 observation_dim: int,
+                 action_dim: int,
+                 arch: Optional[str] = '256-256',
+                 log_std_multiplier: Optional[float] = 1.0,
+                 log_std_offset: Optional[float] = -1.0,
+                 no_tanh: Optional[bool] = False,
+                 activation: Optional[str] = 'relu',
+                 phi: Optional[float] = 0.5):
+        super().__init__()
+        self.observation_dim = observation_dim
+        self.action_dim = action_dim
+        self.arch = arch
+        self.no_tanh = no_tanh
+        self.phi = phi
+
+        self.base_network = FullyConnectedNetwork(
+            observation_dim, 2 * action_dim, arch,
+            activation=activation
+        )
+        self.log_std_multiplier = Scalar(log_std_multiplier)
+        self.log_std_offset = Scalar(log_std_offset)
+        self.tanh_gaussian = ReparameterizedTanhGaussian(no_tanh=no_tanh)
+
+    def log_prob(self, observations: torch.Tensor, actions: torch.Tensor, expert_actions: torch.Tensor) -> torch.Tensor:
+        """
+        Compute the log probability of a given set of actions with respect to expert actions.
+
+        Parameters:
+        ----------
+        observations : torch.Tensor
+            Observations to condition the policy on.
+        actions : torch.Tensor
+            Actions for which to compute the log probability.
+        expert_actions : torch.Tensor
+            Expert actions to condition the policy on.
+
+        Returns:
+        ----------
+        torch.Tensor
+            Log probability of the given actions with respect to expert actions.
+        """
+        if actions.ndim == 3:
+            observations = extend_and_repeat(observations, 1, actions.shape[1])
+        base_network_output = self.base_network(observations)
+        mean, log_std = torch.split(base_network_output, self.action_dim, dim=-1)
+        log_std = self.log_std_multiplier() * log_std + self.log_std_offset()
+
+        # get reversed actions to get the log prob of the expert parametrized policy
+        phi_actions = (actions - expert_actions) / self.phi
+
+        return self.tanh_gaussian.log_prob(mean, log_std, phi_actions)
+
+    def forward(self, observations: torch.Tensor, expert_actions: torch.Tensor, beta: float = 0.,
+                deterministic: bool = False, repeat: Optional[int] = None) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Generate a sample and compute the log probability with respect to expert actions.
+
+        Parameters:
+        ----------
+        observations : torch.Tensor
+            Observations to condition the policy on.
+        expert_actions : torch.Tensor
+            Expert actions to condition the policy on.
+        beta : float, optional
+            Beta parameter for the perturbation (default is 0.).
+        deterministic : bool, optional
+            Flag indicating whether to sample deterministically (default is False).
+        repeat : Optional[int], optional
+            Number of times to repeat the action sampling (default is None).
+
+        Returns:
+        ----------
+        Tuple[torch.Tensor, torch.Tensor]
+            Generated action sample and its log probability.
+        """
+        if repeat is not None:
+            observations = extend_and_repeat(observations, 1, repeat)
+        base_network_output = self.base_network(observations)
+        mean, log_std = torch.split(base_network_output, self.action_dim, dim=-1)
+        log_std = self.log_std_multiplier() * log_std + self.log_std_offset()
+        actions, log_probs = self.tanh_gaussian(mean, log_std, deterministic)
+        return (expert_actions + self.phi * (1 - beta) * actions).clamp(-0.999, 0.999), log_probs
+
+
+class SamplerPolicy(object):
+    """
+    Wrapper class for creating a callable policy for action sampling.
+
+    Parameters:
+    ----------
+    policy : nn.Module
+        Policy module used for action sampling.
+    device : torch.device
+        Device on which to perform the action sampling.
+    from_ext : bool, optional
+        Flag indicating whether the policy is from an external source (default is False).
+    """
+
+    def __init__(self, policy: nn.Module, device: torch.device, from_ext: bool = False):
+        self.policy = policy
+        self.device = device
+        self.from_ext = from_ext
+
+    def __call__(self, observations: Union[torch.Tensor, np.ndarray], deterministic: bool = False) -> np.ndarray:
+        """
+        Sample actions from the policy.
+
+        Parameters:
+        ----------
+        observations : Union[torch.Tensor, np.ndarray]
+            Observations to condition the policy on.
+        deterministic : bool, optional
+            Flag indicating whether to sample deterministically (default is False).
+
+        Returns:
+        ----------
+        np.ndarray
+            Sampled actions.
+        """
+        with torch.no_grad():
+            observations = torch.tensor(
+                observations, dtype=torch.float32, device=self.device
+            )
+            actions, _ = self.policy(observations, deterministic)
+            actions = actions.cpu().numpy()
+        return np.clip(actions, a_min=-0.999, a_max=0.999)
+
+
+class ExpertSamplerPolicy(object):
+    """
+    Wrapper class for creating a callable policy for expert action sampling.
+
+    Parameters:
+    ----------
+    policy : nn.Module
+        Policy module used for expert action sampling.
+    device : torch.device
+        Device on which to perform the expert action sampling.
+    from_ext : bool, optional
+        Flag indicating whether the policy is from an external source (default is False).
+    """
+
+    def __init__(self, policy: nn.Module, device: torch.device, from_ext: bool = False):
+        self.policy = policy
+        self.device = device
+        self.from_ext = from_ext
+
+    def __call__(self, observations: Union[torch.Tensor, np.ndarray],
+                 expert_actions: Union[torch.Tensor, np.ndarray], beta: float = 1.,
+                 deterministic: bool = False) -> np.ndarray:
+        """
+        Sample expert actions from the policy.
+
+        Parameters:
+        ----------
+        observations : Union[torch.Tensor, np.ndarray]
+            Observations to condition the expert policy on.
+        expert_actions : Union[torch.Tensor, np.ndarray]
+            Expert actions to condition the expert policy on.
+        beta : float, optional
+            Weighting factor for blending expert actions (default is 1.).
+        deterministic : bool, optional
+            Flag indicating whether to sample expert actions deterministically (default is False).
+
+        Returns:
+        ----------
+        np.ndarray
+            Sampled expert actions.
+        """
+        with torch.no_grad():
+            observations = torch.tensor(
+                observations, dtype=torch.float32, device=self.device
+            )
+            expert_actions = torch.tensor(
+                expert_actions, dtype=torch.float32, device=self.device
+            )
+            actions, _ = self.policy(observations, expert_actions,
+                                     beta=beta,
+                                     deterministic=deterministic)
+            actions = actions.cpu().numpy()
+        return np.clip(actions, a_min=-0.999, a_max=0.999)
+
+
+class FullyConnectedQFunction(nn.Module):
+    """
+    Fully connected Q-function neural network.
+
+    Parameters:
+    ----------
+    observation_dim : int
+        Dimension of the observation space.
+    action_dim : int
+        Dimension of the action space.
+    arch : str, optional
+        Architecture configuration for the fully connected layers (default is '256-256').
+    activation : str, optional
+        Activation function to use in the hidden layers (default is 'relu').
+    return_last_layer : bool, optional
+        Whether to return the activations of the last hidden layer (default is False).
+    """
+
+    def __init__(self, observation_dim: int, action_dim: int, arch: Optional[str] = '256-256',
+                 activation: Optional[str] = 'relu', return_last_layer: Optional[bool] = False):
+        super().__init__()
+        self.observation_dim = observation_dim
+        self.action_dim = action_dim
+        self.arch = arch
+        self.return_last_layer = return_last_layer
+        self.network = FullyConnectedNetwork(
+            observation_dim + action_dim, 1, arch, activation=activation,
+            return_last_layer=return_last_layer
+        )
+
+    def forward(self, observations: torch.Tensor, actions: torch.Tensor) -> torch.Tensor:
+        """
+        Forward pass of the Q-function.
+
+        Parameters:
+        ----------
+        observations : torch.Tensor
+            Input observations.
+        actions : torch.Tensor
+            Input actions.
+
+        Returns:
+        ----------
+        torch.Tensor
+            Q-values for the given observations and actions.
+        """
+        if actions.ndim == 3 and observations.ndim == 2:
+            observations = extend_and_repeat(observations, 1, actions.shape[1])
+        input_tensor = torch.cat([observations, actions], dim=-1)
+        if self.return_last_layer:
+            output, last_layer = self.network(input_tensor)
+            return torch.squeeze(output, dim=-1), last_layer
+        output = self.network(input_tensor)
+        return torch.squeeze(output, dim=-1)
+
+
+class TD3Policy(nn.Module):
+    """
+    Twin Delayed DDPG (TD3) policy network.
+
+    Parameters:
+    ----------
+    observation_dim : int
+        Dimension of the observation space.
+    action_dim : int
+        Dimension of the action space.
+    arch : str, optional
+        Architecture configuration for the fully connected layers (default is '256-256').
+    """
+
+    def __init__(self, observation_dim: int, action_dim: int, arch: str = '256-256'):
+        super(TD3Policy, self).__init__()
+        self.arch = arch
+
+        self.base_network = FullyConnectedNetwork(
+            observation_dim, action_dim, arch
+        )
+
+    def forward(self, observation: torch.Tensor, deterministic: Optional[bool] = False) -> Tuple[torch.Tensor, None]:
+        """
+        Forward pass of the TD3 policy network.
+
+        Parameters:
+        ----------
+        observation : torch.Tensor
+            Input observation.
+        deterministic : bool, optional
+            Whether to use deterministic policy (default is False). Added it for code consistency.
+
+        Returns:
+        ----------
+        Tuple[torch.Tensor, None]
+            Tuple containing the action tensor and None (no auxiliary information).
+        """
+        a_init = self.base_network(observation)
+        return torch.tanh(a_init), None
+
+
+class Scalar(nn.Module):
+    """
+    Scalar value represented as a learnable parameter.
+
+    Parameters:
+    ----------
+    init_value : float
+        Initial value for the scalar.
+    """
+
+    def __init__(self, init_value: float):
+        super().__init__()
+        self.constant = nn.Parameter(
+            torch.tensor(init_value, dtype=torch.float32)
+        )
+
+    def forward(self) -> torch.Tensor:
+        """
+        Forward pass to retrieve the scalar value.
+
+        Returns:
+        ----------
+        torch.Tensor
+            Learnable scalar value.
+        """
+        return self.constant
diff --git a/RLLG/agents/common/replay_buffer.py b/RLLG/agents/common/replay_buffer.py
index 4c5e1c6d..8e04be60 100644
--- a/RLLG/agents/common/replay_buffer.py
+++ b/RLLG/agents/common/replay_buffer.py
@@ -1,180 +1,403 @@
-# 2023.02.14-Changed for RLLG
-#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
-
-# Copyright (c) 2020 Xinyang Geng.
-
-# All rights reserved.
-
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-
-
-import numpy as np
-import torch
-
-
-class ReplayBuffer(object):
-    def __init__(self, max_size, data=None, nb_local_experts=0):
-        self._max_size = max_size
-        self._next_idx = 0
-        self._size = 0
-        self._initialized = False
-        self._total_steps = 0
-        self.nb_local_experts = nb_local_experts
-
-        if data is not None:
-            if self._max_size < data['observations'].shape[0]:
-                self._max_size = data['observations'].shape[0]
-            self.add_batch(data)
-
-    def __len__(self):
-        return self._size
-
-    def _init_storage(self, observation_dim, action_dim):
-        self._observation_dim = observation_dim
-        self._action_dim = action_dim
-        self._observations = np.zeros((self._max_size, observation_dim), dtype=np.float32)
-        self._next_observations = np.zeros((self._max_size, observation_dim), dtype=np.float32)
-        self._actions = np.zeros((self._max_size, action_dim), dtype=np.float32)
-        self._rewards = np.zeros(self._max_size, dtype=np.float32)
-        self._dones = np.zeros(self._max_size, dtype=np.float32)
-        self._use_local_current = np.zeros(self._max_size, dtype=np.float32)
-        self._use_local_next = np.zeros(self._max_size, dtype=np.float32)
-        self._expert_actions = np.zeros((self._max_size, action_dim), dtype=np.float32)
-        self._next_expert_actions = np.zeros((self._max_size, action_dim), dtype=np.float32)
-        self._next_idx = 0
-        self._size = 0
-        self._initialized = True
-
-    def add_sample(self, observation, action, reward, next_observation, done,
-                   use_local_current, use_local_next, expert_actions, next_expert_actions):
-        if not self._initialized:
-            self._init_storage(observation.size, action.size)
-
-        self._observations[self._next_idx, :] = np.array(observation, dtype=np.float32)
-        self._next_observations[self._next_idx, :] = np.array(next_observation, dtype=np.float32)
-        self._actions[self._next_idx, :] = np.array(action, dtype=np.float32)
-        self._rewards[self._next_idx] = reward
-        self._dones[self._next_idx] = float(done)
-
-        # use locals
-        self._use_local_current[self._next_idx] = float(use_local_current)
-        self._use_local_next[self._next_idx] = float(use_local_next)
-
-        # actions
-        self._expert_actions[self._next_idx] = np.array(expert_actions, dtype=np.float32)
-        self._next_expert_actions[self._next_idx] = np.array(next_expert_actions, dtype=np.float32)
-
-        if self._size < self._max_size:
-            self._size += 1
-        self._next_idx = (self._next_idx + 1) % self._max_size
-        self._total_steps += 1
-
-    def add_traj(self, observations, actions, rewards, next_observations, dones,
-                 use_local_current, use_local_next, expert_actions, next_expert_actions):
-        for o, a, r, no, d, u_c, u_n, ea, nea in zip(observations, actions, rewards, next_observations, dones,
-                                                     use_local_current, use_local_next,
-                                                     expert_actions, next_expert_actions):
-            self.add_sample(o, a, r, no, d, u_c, u_n, ea, nea)
-
-    def add_batch(self, batch):
-        self.add_traj(
-            batch['observations'],
-            batch['actions'],
-            batch['rewards'],
-            batch['next_observations'],
-            batch['dones'],
-            batch['use_local_current'],
-            batch['use_local_next'],
-            batch['expert_actions'],
-            batch['next_expert_actions'],
-        )
-
-    def sample(self, batch_size):
-        indices = np.random.randint(len(self), size=batch_size)
-        return self.select(indices)
-
-    def select(self, indices):
-        # select expert if any
-        use_locals_current, use_locals_next = {}, {}
-        expert_actions, next_expert_actions = {}, {}
-        use_local_current = self._use_local_current[indices, ...]
-        use_local_next = self._use_local_next[indices, ...]
-        expert_actions = self._expert_actions[indices, ...]
-        next_expert_actions = self._next_expert_actions[indices, ...]
-
-        return dict(
-            observations=self._observations[indices, ...],
-            actions=self._actions[indices, ...],
-            rewards=self._rewards[indices, ...],
-            next_observations=self._next_observations[indices, ...],
-            dones=self._dones[indices, ...],
-            use_local_current=use_local_current,
-            use_local_next=use_local_next,
-            expert_actions=expert_actions,
-            next_expert_actions=next_expert_actions
-        )
-
-    def generator(self, batch_size, n_batchs=None):
-        i = 0
-        while n_batchs is None or i < n_batchs:
-            yield self.sample(batch_size)
-            i += 1
-
-    @property
-    def total_steps(self):
-        return self._total_steps
-
-    @property
-    def data(self):
-        return dict(
-            observations=self._observations[:self._size, ...],
-            actions=self._actions[:self._size, ...],
-            rewards=self._rewards[:self._size, ...],
-            next_observations=self._next_observations[:self._size, ...],
-            dones=self._dones[:self._size, ...],
-            use_local_current=self._use_local_current[:self._size, ...],
-            use_local_next=self._use_local_next[:self._size, ...],
-            expert_actions=self._expert_actions[:self._size, ...],
-            next_expert_actions=self._next_expert_actions[:self._size, ...],
-        )
-
-
-def batch_to_torch(batch, device):
-    return {
-        k: torch.from_numpy(v).to(device=device, non_blocking=True) if type(v) is np.ndarray
-        else {nb: torch.from_numpy(v[nb]).to(device=device, non_blocking=True) for nb in range(len(v))}
-        for k, v in batch.items()
-    }
-
-
-def subsample_batch(batch, size):
-    indices = np.random.randint(batch['observations'].shape[0], size=size)
-
-    return dict(
-        observations=batch['observations'][indices, ...],
-        actions=batch['actions'][indices, ...],
-        rewards=batch['rewards'][indices, ...],
-        next_observations=batch['next_observations'][indices, ...],
-        dones=batch['dones'][indices, ...],
-        use_local_current=batch['use_local_current'][indices, ...],
-        use_local_next=batch['use_local_next'][indices, ...],
-        expert_actions=batch['expert_actions'][indices, ...],
-        next_expert_actions=batch['next_expert_actions'][indices, ...],
-    )
-
-
-def concatenate_batches(batches):
-
-    return dict(
-        observations=np.concatenate([batch['observations'] for batch in batches], axis=0).astype(np.float32),
-        actions=np.concatenate([batch['actions'] for batch in batches], axis=0).astype(np.float32),
-        rewards=np.concatenate([batch['rewards'] for batch in batches], axis=0).astype(np.float32),
-        next_observations=np.concatenate([batch['next_observations'] for batch in batches], axis=0).astype(np.float32),
-        dones=np.concatenate([batch['dones'] for batch in batches], axis=0).astype(np.float32),
-        use_locals_current=np.concatenate([batch['use_locals_current'] for batch in batches], axis=0).astype(np.float32),
-        use_locals_next=np.concatenate([batch['use_locals_next'] for batch in batches], axis=0).astype(np.float32),
-        expert_actions=np.concatenate([batch['expert_actions'] for batch in batches], axis=0).astype(np.float32),
-        next_expert_actions=np.concatenate([batch['next_expert_actions'] for batch in batches], axis=0).astype(np.float32),
-    )
+# 2023.02.14-Changed for RLLG
+#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
+
+# Copyright (c) 2020 Xinyang Geng.
+
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from typing import Optional, Dict, Union, List, Generator, Any
+import numpy as np
+import torch
+
+
+class ReplayBuffer(object):
+    """
+    Replay buffer for storing and sampling transitions.
+
+    Parameters:
+    ----------
+    max_size : int
+        Maximum size of the replay buffer.
+    data : dict, optional
+        Initial data to populate the replay buffer.
+    nb_local_experts : int, optional
+        Number of local experts (default is 0).
+    """
+
+    def __init__(self, max_size: int, data: Optional[Dict[str, np.ndarray]] = None, nb_local_experts: Optional[int] = 0):
+        self._max_size = max_size
+        self._next_idx = 0
+        self._size = 0
+        self._initialized = False
+        self._total_steps = 0
+        self.nb_local_experts = nb_local_experts
+
+        if data is not None:
+            if self._max_size < data['observations'].shape[0]:
+                self._max_size = data['observations'].shape[0]
+            self.add_batch(data)
+
+    def __len__(self) -> int:
+        """
+        Get the current size of the replay buffer.
+
+        Returns:
+        ----------
+        int
+            Current size of the replay buffer.
+        """
+        return self._size
+
+    def _init_storage(self, observation_dim: int, action_dim: int) -> None:
+        """
+        Initialize the storage arrays.
+
+        Parameters:
+        ----------
+        observation_dim : int
+            Dimensionality of the observations.
+        action_dim : int
+            Dimensionality of the actions.
+
+        Returns:
+        ----------
+        None
+        """
+        self._observation_dim = observation_dim
+        self._action_dim = action_dim
+        self._observations = np.zeros((self._max_size, observation_dim), dtype=np.float32)
+        self._next_observations = np.zeros((self._max_size, observation_dim), dtype=np.float32)
+        self._actions = np.zeros((self._max_size, action_dim), dtype=np.float32)
+        self._rewards = np.zeros(self._max_size, dtype=np.float32)
+        self._dones = np.zeros(self._max_size, dtype=np.float32)
+        self._use_local_current = np.zeros(self._max_size, dtype=np.float32)
+        self._use_local_next = np.zeros(self._max_size, dtype=np.float32)
+        self._expert_actions = np.zeros((self._max_size, action_dim), dtype=np.float32)
+        self._next_expert_actions = np.zeros((self._max_size, action_dim), dtype=np.float32)
+        self._next_idx = 0
+        self._size = 0
+        self._initialized = True
+
+    def add_sample(self,
+                   observation: np.ndarray,
+                   action: np.ndarray,
+                   reward: float,
+                   next_observation: np.ndarray,
+                   done: bool,
+                   use_local_current: float,
+                   use_local_next: float,
+                   expert_actions: np.ndarray,
+                   next_expert_actions: np.ndarray):
+        """
+        Add a single transition to the replay buffer.
+
+        Parameters:
+        ----------
+        observation : np.ndarray
+            Observation array.
+        action : np.ndarray
+            Action array.
+        reward : float
+            Reward value.
+        next_observation : np.ndarray
+            Next observation array.
+        done : bool
+            Whether the episode is done.
+        use_local_current : float
+            Confidence function for local expert for the current action.
+        use_local_next : float
+            Confidence function for local expert for the next action.
+        expert_actions : np.ndarray
+            Expert actions array.
+        next_expert_actions : np.ndarray
+            Next expert actions array.
+
+        Returns:
+        ----------
+        None
+        """
+        if not self._initialized:
+            self._init_storage(observation.size, action.size)
+
+        self._observations[self._next_idx, :] = np.array(observation, dtype=np.float32)
+        self._next_observations[self._next_idx, :] = np.array(next_observation, dtype=np.float32)
+        self._actions[self._next_idx, :] = np.array(action, dtype=np.float32)
+        self._rewards[self._next_idx] = reward
+        self._dones[self._next_idx] = float(done)
+
+        # use locals
+        self._use_local_current[self._next_idx] = float(use_local_current)
+        self._use_local_next[self._next_idx] = float(use_local_next)
+
+        # actions
+        self._expert_actions[self._next_idx] = np.array(expert_actions, dtype=np.float32)
+        self._next_expert_actions[self._next_idx] = np.array(next_expert_actions, dtype=np.float32)
+
+        if self._size < self._max_size:
+            self._size += 1
+        self._next_idx = (self._next_idx + 1) % self._max_size
+        self._total_steps += 1
+
+    def add_traj(self, observations: np.ndarray, actions: np.ndarray, rewards: np.ndarray,
+                 next_observations: np.ndarray, dones: np.ndarray,
+                 use_local_current: np.ndarray, use_local_next: np.ndarray,
+                 expert_actions: np.ndarray, next_expert_actions: np.ndarray):
+        """
+        Add a trajectory to the replay buffer.
+
+        Parameters:
+        ----------
+        observations : np.ndarray
+            Array of observations.
+        actions : np.ndarray
+            Array of actions.
+        rewards : np.ndarray
+            Array of rewards.
+        next_observations : np.ndarray
+            Array of next observations.
+        dones : np.ndarray
+            Array of done flags.
+        use_local_current : np.ndarray
+            Array of flags for using local expert for the current action.
+        use_local_next : np.ndarray
+            Array of flags for using local expert for the next action.
+        expert_actions : np.ndarray
+            Array of expert actions.
+        next_expert_actions : np.ndarray
+            Array of next expert actions.
+
+        Returns:
+        ----------
+        None
+        """
+        for o, a, r, no, d, u_c, u_n, ea, nea in zip(observations, actions, rewards, next_observations, dones,
+                                                     use_local_current, use_local_next,
+                                                     expert_actions, next_expert_actions):
+            self.add_sample(o, a, r, no, d, u_c, u_n, ea, nea)
+
+    def add_batch(self, batch: Dict[str, np.ndarray]):
+        """
+        Add a batch of data to the replay buffer.
+
+        Parameters:
+        ----------
+        batch : Dict[str, np.ndarray]
+            Dictionary containing arrays of observations, actions, rewards, next observations,
+            done flags, floats for the confidence function of the local expert for the current action,
+            floats for the confidence function of the local expert for the next action, expert actions,
+            and next expert actions.
+
+        Returns:
+        ----------
+        None
+        """
+        self.add_traj(
+            batch['observations'],
+            batch['actions'],
+            batch['rewards'],
+            batch['next_observations'],
+            batch['dones'],
+            batch['use_local_current'],
+            batch['use_local_next'],
+            batch['expert_actions'],
+            batch['next_expert_actions'],
+        )
+
+    def sample(self, batch_size: int) -> Dict[str, np.ndarray]:
+        """
+        Sample a batch of data from the replay buffer.
+
+        Parameters:
+        ----------
+        batch_size : int
+            The number of samples to be drawn.
+
+        Returns:
+        ----------
+        Dict[str, np.ndarray]
+            Dictionary containing arrays of observations, actions, rewards, next observations,
+            done flags, flags for using local expert for the current action, flags for using local
+            expert for the next action, expert actions, and next expert actions.
+        """
+        indices = np.random.randint(len(self), size=batch_size)
+        return self.select(indices)
+
+    def select(self, indices: np.ndarray) -> Dict[str, np.ndarray]:
+        """
+        Select samples from the replay buffer based on the given indices.
+
+        Parameters:
+        ----------
+        indices : np.ndarray
+            Array of indices to select samples from the replay buffer.
+
+        Returns:
+        ----------
+        Dict[str, np.ndarray]
+            Dictionary containing arrays of observations, actions, rewards, next observations,
+            done flags, flags for using local expert for the current action, flags for using local
+            expert for the next action, expert actions, and next expert actions.
+        """
+        use_local_current = self._use_local_current[indices, ...]
+        use_local_next = self._use_local_next[indices, ...]
+        expert_actions = self._expert_actions[indices, ...]
+        next_expert_actions = self._next_expert_actions[indices, ...]
+
+        return dict(
+            observations=self._observations[indices, ...],
+            actions=self._actions[indices, ...],
+            rewards=self._rewards[indices, ...],
+            next_observations=self._next_observations[indices, ...],
+            dones=self._dones[indices, ...],
+            use_local_current=use_local_current,
+            use_local_next=use_local_next,
+            expert_actions=expert_actions,
+            next_expert_actions=next_expert_actions
+        )
+
+    def generator(self, batch_size: int, n_batchs: Optional[int] = None) -> Generator[Dict[str, Any], None, None]:
+        """
+        Generator function that yields batches of samples from the replay buffer.
+
+        Parameters:
+        ----------
+        batch_size : int
+            Size of each batch.
+        n_batchs : int, optional
+            Number of batches to generate (default is None for an infinite generator).
+
+        Yields:
+        ----------
+        Dict[str, Any]
+            Dictionary containing arrays of observations, actions, rewards, next observations,
+            done flags, flags for using local expert for the current action, flags for using local
+            expert for the next action, expert actions, and next expert actions.
+        """
+        i = 0
+        while n_batchs is None or i < n_batchs:
+            yield self.sample(batch_size)
+            i += 1
+
+    @property
+    def total_steps(self) -> int:
+        """
+        Property to get the total number of steps taken by the replay buffer.
+
+        Returns:
+        ----------
+        int
+            Total number of steps.
+        """
+        return self._total_steps
+
+    @property
+    def data(self) -> Dict[str, Any]:
+        """
+        Property to get a dictionary containing arrays of observations, actions, rewards, next observations,
+        done flags, confidence function for using local expert for the current action,
+        confidence function for using local expert for the next action, expert actions, and next expert actions.
+
+        Returns:
+        ----------
+        Dict[str, Any]
+            Dictionary containing arrays of observations, actions, rewards, next observations,
+            done flags, confidence function for using local expert for the current action,
+            confidence function for using local expert for the next action, expert actions, and next expert actions.
+        """
+        return dict(
+            observations=self._observations[:self._size, ...],
+            actions=self._actions[:self._size, ...],
+            rewards=self._rewards[:self._size, ...],
+            next_observations=self._next_observations[:self._size, ...],
+            dones=self._dones[:self._size, ...],
+            use_local_current=self._use_local_current[:self._size, ...],
+            use_local_next=self._use_local_next[:self._size, ...],
+            expert_actions=self._expert_actions[:self._size, ...],
+            next_expert_actions=self._next_expert_actions[:self._size, ...],
+        )
+
+
+def batch_to_torch(batch: Dict[str, Union[np.ndarray, Dict[int, np.ndarray]]], device: str) \
+        -> Dict[str, Union[torch.Tensor, Dict[int, torch.Tensor]]]:
+    """
+    Convert a batch from NumPy arrays to PyTorch tensors.
+
+    Parameters:
+    ----------
+    batch : Dict[str, Union[np.ndarray, Dict[int, np.ndarray]]]
+        Dictionary containing NumPy arrays or dictionaries of NumPy arrays.
+    device : str
+        The device to which the tensors should be moved.
+
+    Returns:
+    ----------
+    Dict[str, Union[torch.Tensor, Dict[int, torch.Tensor]]]
+        Dictionary containing PyTorch tensors or dictionaries of PyTorch tensors.
+    """
+    return {
+        k: torch.from_numpy(v).to(device=device, non_blocking=True) if type(v) is np.ndarray
+        else {nb: torch.from_numpy(v[nb]).to(device=device, non_blocking=True) for nb in range(len(v))}
+        for k, v in batch.items()
+    }
+
+
+def subsample_batch(batch: Dict[str, np.ndarray], size: int) -> Dict[str, np.ndarray]:
+    """
+    Subsample a batch with the given size.
+
+    Parameters:
+    ----------
+    batch : Dict[str, np.ndarray]
+        Dictionary containing NumPy arrays.
+    size : int
+        The size of the subsampled batch.
+
+    Returns:
+    ----------
+    Dict[str, np.ndarray]
+        Subsampled batch.
+    """
+    indices = np.random.randint(batch['observations'].shape[0], size=size)
+
+    return dict(
+        observations=batch['observations'][indices, ...],
+        actions=batch['actions'][indices, ...],
+        rewards=batch['rewards'][indices, ...],
+        next_observations=batch['next_observations'][indices, ...],
+        dones=batch['dones'][indices, ...],
+        use_local_current=batch['use_local_current'][indices, ...],
+        use_local_next=batch['use_local_next'][indices, ...],
+        expert_actions=batch['expert_actions'][indices, ...],
+        next_expert_actions=batch['next_expert_actions'][indices, ...],
+    )
+
+
+def concatenate_batches(batches: List[Dict[str, np.ndarray]]) -> Dict[str, np.ndarray]:
+    """
+    Concatenate multiple batches into a single batch.
+
+    Parameters:
+    ----------
+    batches : List[Dict[str, np.ndarray]]
+        List of dictionaries, each containing NumPy arrays.
+
+    Returns:
+    ----------
+    Dict[str, np.ndarray]
+        Concatenated batch.
+    """
+    return dict(
+        observations=np.concatenate([batch['observations'] for batch in batches], axis=0).astype(np.float32),
+        actions=np.concatenate([batch['actions'] for batch in batches], axis=0).astype(np.float32),
+        rewards=np.concatenate([batch['rewards'] for batch in batches], axis=0).astype(np.float32),
+        next_observations=np.concatenate([batch['next_observations'] for batch in batches], axis=0).astype(np.float32),
+        dones=np.concatenate([batch['dones'] for batch in batches], axis=0).astype(np.float32),
+        use_locals_current=np.concatenate([batch['use_locals_current'] for batch in batches], axis=0).astype(np.float32),
+        use_locals_next=np.concatenate([batch['use_locals_next'] for batch in batches], axis=0).astype(np.float32),
+        expert_actions=np.concatenate([batch['expert_actions'] for batch in batches], axis=0).astype(np.float32),
+        next_expert_actions=np.concatenate([batch['next_expert_actions'] for batch in batches], axis=0).astype(np.float32),
+    )
diff --git a/RLLG/agents/common/sampler.py b/RLLG/agents/common/sampler.py
index 153cccb1..4af0d65a 100644
--- a/RLLG/agents/common/sampler.py
+++ b/RLLG/agents/common/sampler.py
@@ -1,193 +1,273 @@
-# 2023.02.14-Changed for RLLG
-#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
-
-# Copyright (c) 2020 Xinyang Geng.
-
-# All rights reserved.
-
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-
-
-import numpy as np
-
-
-class StepSampler(object):
-
-    def __init__(self,
-                 env,
-                 max_traj_length=1000):
-        self.max_traj_length = max_traj_length
-        self._env = env
-        self._traj_steps = 0
-        self._current_observation = self.env.reset()
-
-    def sample(self, agent, n_steps, deterministic=False, replay_buffer=None):
-        # general observations
-        observations = []
-        actions = []
-        rewards = []
-        next_observations = []
-        dones = []
-        list_use_local_current = []
-        list_use_local_next = []
-        failures = []
-
-        for n_ in range(n_steps):
-
-            self._traj_steps += 1
-            observation = self._current_observation
-
-            # get action and local information
-            if n_ == 0:
-                action, use_local_current, expert_action = agent.get_action(self.env,
-                                                                            observation,
-                                                                            deterministic=deterministic,
-                                                                            add_local_information=True)
-            else:
-                expert_action = next_expert_action.copy()
-                use_local_current = use_local_next
-                action = next_action.copy()
-
-            # Apply next action and save transition
-            next_observation, reward, done, info = self.env.step(action)
-            observations.append(observation)
-            actions.append(action)
-            rewards.append(reward)
-            dones.append(done)
-            next_observations.append(next_observation)
-            if reward <= -500:
-                failures.append(1)
-            else:
-                failures.append(0)
-
-            # Choose action according to local policies to record for both obs and next_obs
-            next_action, use_local_next, next_expert_action = agent.get_action(self.env,
-                                                                               next_observation,
-                                                                               deterministic=deterministic,
-                                                                               add_local_information=True)
-
-            # add local information
-            list_use_local_current.append(use_local_current)
-            list_use_local_next.append(use_local_next)
-
-            if replay_buffer is not None:
-                replay_buffer.add_sample(
-                    observation,
-                    action,
-                    reward,
-                    next_observation,
-                    done,
-                    use_local_current,
-                    use_local_next,
-                    expert_action,
-                    next_expert_action
-                )
-
-            self._current_observation = next_observation
-
-            if done or self._traj_steps >= self.max_traj_length:
-                self._current_observation = self.env.reset()
-                self._traj_steps = 0
-
-        metrics_to_return = dict(
-            observations=np.array(observations, dtype=np.float32),
-            actions=np.array(actions, dtype=np.float32),
-            rewards=np.array(rewards, dtype=np.float32),
-            next_observations=np.array(next_observations, dtype=np.float32),
-            dones=np.array(dones, dtype=np.float32),
-            list_use_local_current=np.array(list_use_local_current, dtype=np.float32),
-            list_use_local_next=np.array(list_use_local_next, dtype=np.float32),
-            failures=np.array(failures, dtype=np.float32),
-        )
-
-        return metrics_to_return
-
-    @property
-    def env(self):
-        return self._env
-
-
-class TrajSampler(object):
-
-    def __init__(self,
-                 env,
-                 max_traj_length=1000):
-        self.max_traj_length = max_traj_length
-        self._env = env
-
-    def sample(self, agent, n_trajs, deterministic=False, replay_buffer=None, replay_buffer_success=None):
-
-        trajs = []
-
-        for _ in range(n_trajs):
-            observations = []
-            actions = []
-            rewards = []
-            next_observations = []
-            dones = []
-            failures = []
-            list_use_local_current = []
-            list_use_local_next = []
-
-            observation = self.env.reset()
-
-            for n_ in range(self.max_traj_length):
-
-                # get action and local information
-                if n_ == 0:
-                    action, use_local_current, expert_action = agent.get_action(self.env,
-                                                                                observation,
-                                                                                deterministic=deterministic,
-                                                                                add_local_information=True)
-                else:
-                    expert_action = next_expert_action.copy()
-                    use_local_current = use_local_next
-                    action = next_action.copy()
-
-                # Apply next action and save transition
-                next_observation, reward, done, info = self.env.step(action)
-                observations.append(observation)
-                actions.append(action)
-                rewards.append(reward)
-                dones.append(done)
-                next_observations.append(next_observation)
-                if reward <= -500:
-                    failures.append(1)
-                else:
-                    failures.append(0)
-
-                # Choose action according to local policies to record for both obs and next_obs
-                next_action, use_local_next, next_expert_action = agent.get_action(self.env,
-                                                                                   next_observation,
-                                                                                   deterministic=deterministic,
-                                                                                   add_local_information=True)
-
-                # add local information
-                list_use_local_current.append(use_local_current)
-                list_use_local_next.append(use_local_next)
-
-                observation = next_observation
-
-                if done:
-                    break
-
-            metrics_to_return = dict(
-                observations=np.array(observations, dtype=np.float32),
-                actions=np.array(actions, dtype=np.float32),
-                rewards=np.array(rewards, dtype=np.float32),
-                next_observations=np.array(next_observations, dtype=np.float32),
-                dones=np.array(dones, dtype=np.float32),
-                list_use_local_current=np.array(list_use_local_current, dtype=np.float32),
-                list_use_local_next=np.array(list_use_local_next, dtype=np.float32),
-                failures=np.array(failures, dtype=np.float32),
-            )
-
-            trajs.append(metrics_to_return)
-
-        return trajs
-
-    @property
-    def env(self):
-        return self._env
+# 2023.02.14-Changed for RLLG
+#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
+
+# Copyright (c) 2020 Xinyang Geng.
+
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from typing import Any, Optional, Dict
+from agents.common.replay_buffer import ReplayBuffer
+import numpy as np
+
+
+class StepSampler(object):
+    """
+    StepSampler for collecting time-steps from an environment.
+
+    Parameters:
+    ----------
+    env : Any
+        The environment.
+    max_traj_length : int, optional
+        Maximum length of a trajectory (default is 1000).
+    """
+
+    def __init__(self,
+                 env: Any,
+                 max_traj_length: Optional[int] = 1000):
+        self.max_traj_length = max_traj_length
+        self._env = env
+        self._traj_steps = 0
+        self._current_observation = self.env.reset()
+
+    def sample(self, agent: Any, n_steps: int,
+               deterministic: Optional[bool] = False,
+               replay_buffer: Optional[ReplayBuffer] = None) -> Dict:
+        """
+        Collect time-steps from the environment using the provided agent.
+
+        Parameters:
+        ----------
+        agent : Any
+            The agent used to interact with the environment.
+        n_steps : int
+            Number of steps to collect.
+        deterministic : bool, optional
+            Whether to use deterministic actions (default is False).
+        replay_buffer : ReplayBuffer, optional
+            The replay buffer to store the collected samples (default is None).
+
+        Returns:
+        ----------
+        metrics
+            Dictionary containing collected time-steps information.
+        """
+        # general observations
+        observations = []
+        actions = []
+        rewards = []
+        next_observations = []
+        dones = []
+        list_use_local_current = []
+        list_use_local_next = []
+        failures = []
+
+        for n_ in range(n_steps):
+
+            self._traj_steps += 1
+            observation = self._current_observation
+
+            # get action and local information
+            if n_ == 0:
+                action, use_local_current, expert_action = agent.get_action(self.env,
+                                                                            observation,
+                                                                            deterministic=deterministic,
+                                                                            add_local_information=True)
+            else:
+                expert_action = next_expert_action.copy()
+                use_local_current = use_local_next
+                action = next_action.copy()
+
+            # Apply next action and save transition
+            next_observation, reward, done, info = self.env.step(action)
+            observations.append(observation)
+            actions.append(action)
+            rewards.append(reward)
+            dones.append(done)
+            next_observations.append(next_observation)
+            if reward <= -500:
+                failures.append(1)
+            else:
+                failures.append(0)
+
+            # Choose action according to local policies to record for both obs and next_obs
+            next_action, use_local_next, next_expert_action = agent.get_action(self.env,
+                                                                               next_observation,
+                                                                               deterministic=deterministic,
+                                                                               add_local_information=True)
+
+            # add local information
+            list_use_local_current.append(use_local_current)
+            list_use_local_next.append(use_local_next)
+
+            if replay_buffer is not None:
+                replay_buffer.add_sample(
+                    observation,
+                    action,
+                    reward,
+                    next_observation,
+                    done,
+                    use_local_current,
+                    use_local_next,
+                    expert_action,
+                    next_expert_action
+                )
+
+            self._current_observation = next_observation
+
+            if done or self._traj_steps >= self.max_traj_length:
+                self._current_observation = self.env.reset()
+                self._traj_steps = 0
+
+        metrics_to_return = dict(
+            observations=np.array(observations, dtype=np.float32),
+            actions=np.array(actions, dtype=np.float32),
+            rewards=np.array(rewards, dtype=np.float32),
+            next_observations=np.array(next_observations, dtype=np.float32),
+            dones=np.array(dones, dtype=np.float32),
+            list_use_local_current=np.array(list_use_local_current, dtype=np.float32),
+            list_use_local_next=np.array(list_use_local_next, dtype=np.float32),
+            failures=np.array(failures, dtype=np.float32),
+        )
+
+        return metrics_to_return
+
+    @property
+    def env(self):
+        """
+        Get the environment associated with the StepSampler.
+
+        Returns:
+        ----------
+        gym.Env
+            The environment.
+        """
+        return self._env
+
+
+class TrajSampler(object):
+    """
+    StepSampler for collecting trajectories from an environment.
+
+    Parameters:
+    ----------
+    env : Any
+        The environment.
+    max_traj_length : int, optional
+        Maximum length of a trajectory (default is 1000).
+    """
+
+    def __init__(self,
+                 env: Any,
+                 max_traj_length : Optional[int] = 1000) -> None:
+        self.max_traj_length = max_traj_length
+        self._env = env
+
+    def sample(self, agent: Any, n_trajs: int, deterministic: Optional[bool] = False,
+               replay_buffer: Optional[ReplayBuffer] = None, replay_buffer_success: Optional[ReplayBuffer] = None):
+        """
+        Sample trajectories using the provided agent.
+
+        Parameters:
+        ----------
+        agent : Any
+            The agent used to sample trajectories.
+        n_trajs : int
+            Number of trajectories to sample.
+        deterministic : bool, optional
+            Whether to use deterministic actions (default is False).
+        replay_buffer : ReplayBuffer, optional
+            If provided, add samples to the replay buffer.
+        replay_buffer_success : ReplayBuffer, optional
+            If provided, add successful samples to this replay buffer.
+
+        Returns:
+        ----------
+        List[Dict]
+            List of dictionaries containing trajectory information.
+        """
+
+        trajs = []
+
+        for _ in range(n_trajs):
+            observations = []
+            actions = []
+            rewards = []
+            next_observations = []
+            dones = []
+            failures = []
+            list_use_local_current = []
+            list_use_local_next = []
+
+            observation = self.env.reset()
+
+            for n_ in range(self.max_traj_length):
+
+                # get action and local information
+                if n_ == 0:
+                    action, use_local_current, expert_action = agent.get_action(self.env,
+                                                                                observation,
+                                                                                deterministic=deterministic,
+                                                                                add_local_information=True)
+                else:
+                    expert_action = next_expert_action.copy()
+                    use_local_current = use_local_next
+                    action = next_action.copy()
+
+                # Apply next action and save transition
+                next_observation, reward, done, info = self.env.step(action)
+                observations.append(observation)
+                actions.append(action)
+                rewards.append(reward)
+                dones.append(done)
+                next_observations.append(next_observation)
+                if reward <= -500:
+                    failures.append(1)
+                else:
+                    failures.append(0)
+
+                # Choose action according to local policies to record for both obs and next_obs
+                next_action, use_local_next, next_expert_action = agent.get_action(self.env,
+                                                                                   next_observation,
+                                                                                   deterministic=deterministic,
+                                                                                   add_local_information=True)
+
+                # add local information
+                list_use_local_current.append(use_local_current)
+                list_use_local_next.append(use_local_next)
+
+                observation = next_observation
+
+                if done:
+                    break
+
+            metrics_to_return = dict(
+                observations=np.array(observations, dtype=np.float32),
+                actions=np.array(actions, dtype=np.float32),
+                rewards=np.array(rewards, dtype=np.float32),
+                next_observations=np.array(next_observations, dtype=np.float32),
+                dones=np.array(dones, dtype=np.float32),
+                list_use_local_current=np.array(list_use_local_current, dtype=np.float32),
+                list_use_local_next=np.array(list_use_local_next, dtype=np.float32),
+                failures=np.array(failures, dtype=np.float32),
+            )
+
+            trajs.append(metrics_to_return)
+
+        return trajs
+
+    @property
+    def env(self):
+        """
+        Get the environment associated with the StepSampler.
+
+        Returns:
+        ----------
+        gym.Env
+            The environment.
+        """
+        return self._env
diff --git a/RLLG/agents/common/utils.py b/RLLG/agents/common/utils.py
index 4dbcb54d..3e2aa949 100644
--- a/RLLG/agents/common/utils.py
+++ b/RLLG/agents/common/utils.py
@@ -1,70 +1,117 @@
-# 2023.02.14-Changed for RLLG
-#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
-
-# Copyright (c) 2020 Xinyang Geng.
-
-# All rights reserved.
-
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-
-import random
-import time
-import numpy as np
-import torch
-
-
-class Timer(object):
-
-    def __init__(self):
-        self._time = None
-
-    def __enter__(self):
-        self._start_time = time.time()
-        return self
-
-    def __exit__(self, exc_type, exc_value, exc_tb):
-        self._time = time.time() - self._start_time
-
-    def __call__(self):
-        return self._time
-
-
-def set_random_seed(seed):
-    torch.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)
-    torch.cuda.manual_seed(seed)
-    np.random.seed(seed)
-    random.seed(seed)
-    torch.backends.cudnn.deterministic = True
-    torch.backends.cudnn.benchmark = False
-
-
-def prefix_metrics(metrics, prefix):
-    return {
-        '{}/{}'.format(prefix, key): value for key, value in metrics.items()
-    }
-
-
-def get_global_name(name):
-    """
-    In case one modifies the environment.
-    """
-    if 'cartpole' in name:
-        glob_name = 'cartpole'
-    elif 'point_mass' in name:
-        glob_name = 'point_mass'
-    elif 'hirl_point_fall' in name:
-        glob_name = 'hirl_point_fall'
-    else:
-        glob_name = name
-    return glob_name
-
-
-def get_global_agent_name(agent_name):
-    """
-    For variations of the same agent (for example, Naive or not).
-    """
-    glob_name = agent_name
-    return glob_name
+# 2023.02.14-Changed for RLLG
+#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
+
+# Copyright (c) 2020 Xinyang Geng.
+
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from typing import Dict, Any
+import random
+import time
+import numpy as np
+import torch
+
+
+class Timer(object):
+    """
+    A simple timer class to measure the execution time of a code block using the "with" statement.
+    """
+
+    def __init__(self):
+        self._time = None
+
+    def __enter__(self):
+        self._start_time = time.time()
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_tb):
+        self._time = time.time() - self._start_time
+
+    def __call__(self):
+        return self._time
+
+
+def set_random_seed(seed: int):
+    """
+    Set random seed for reproducibility.
+
+    Parameters:
+    -----------
+    seed : int
+        The desired random seed.
+    """
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.cuda.manual_seed(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
+
+def prefix_metrics(metrics: Dict[str, Any], prefix: str):
+    """
+    Prefixes the keys of a dictionary of metrics.
+
+    Parameters:
+    -----------
+    metrics : dict
+        The dictionary of metrics.
+    prefix : str
+        The prefix to add to each key.
+
+    Returns:
+    --------
+    dict
+        The new dictionary with prefixed keys.
+    """
+    return {
+        '{}/{}'.format(prefix, key): value for key, value in metrics.items()
+    }
+
+
+def get_global_name(name: str) -> str:
+    """
+    In case one modifies the environment.
+
+    Parameters:
+    -----------
+    name : str
+        The name of the environment.
+
+    Returns:
+    --------
+    glob_name : str
+        The global name of the environment.
+    """
+    if 'cartpole' in name:
+        glob_name = 'cartpole'
+    elif 'point_mass' in name:
+        glob_name = 'point_mass'
+    elif 'hirl_point_fall' in name:
+        glob_name = 'hirl_point_fall'
+    else:
+        glob_name = name
+    return glob_name
+
+
+def get_global_agent_name(agent_name: str) -> str:
+    """
+    For variations of the same agent (for example, Naive or not).
+
+    agent_name:
+    -----------
+    agent_name : str
+        The name of the environment.
+
+    Returns:
+    --------
+    glob_name : str
+        The global name of the environment.
+    """
+    glob_name = agent_name
+    return glob_name
diff --git a/RLLG/agents/common/visualization_helpers.py b/RLLG/agents/common/visualization_helpers.py
index 44cbcb35..8b2912cb 100644
--- a/RLLG/agents/common/visualization_helpers.py
+++ b/RLLG/agents/common/visualization_helpers.py
@@ -1,130 +1,157 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-
-import os
-import glob
-import pandas as pd
-import matplotlib.pyplot as plt
-from ray.tune import ExperimentAnalysis
-
-
-def plot_curves(analysis, hps, metric, to_plot="final", label="SAC", n_epochs=2000):
-    """
-    analysis:
-        tune.ray.ExperimentAnalysis
-    hps: hyperparams to choose
-        list
-    metric:
-        str
-    to_plot: to plot best final mean or best overall
-        str: choose between final and overall
-    """
-    group_by = [f'config/{hp}' for hp in hps if hp != 'repeat_run'] + ['epoch']
-    dfs = analysis.trial_dataframes
-    conf = analysis.get_all_configs()
-    path = os.path.dirname(list(conf.keys())[0])
-    conf = {k: {f'config/{_k}': _v for _k, _v in v.items()} for k, v in conf.items()}
-    df = pd.concat([dfs[k].assign(**conf[k]) for k in dfs.keys()])
-    group = df.groupby(group_by)
-    mean = group.mean()
-    std = group.std()
-
-    # if overall or final
-    if to_plot == "overall":
-        plot_max_idx = mean[metric].idxmax()
-        best_dict = {'mean': mean.loc[plot_max_idx], 'std': std.loc[plot_max_idx]}
-    else:
-        final_mean = mean.xs(n_epochs - 1, axis=0, level=len(group_by) - 1, drop_level=False)
-        final_std = std.xs(n_epochs - 1, axis=0, level=len(group_by) - 1, drop_level=False)
-        plot_max_idx = final_mean[metric].idxmax()
-        best_dict = {'mean': final_mean.loc[plot_max_idx], 'std': final_std.loc[plot_max_idx]}
-
-    # plot it
-    idx_but_one = plot_max_idx[:-1]
-    plot_mean = mean.loc[(idx_but_one)][metric]
-    plot_std = std.loc[(idx_but_one)][metric]
-
-    plt.plot(plot_mean, label=label)
-    plt.fill_between(plot_mean.index,
-                     plot_mean - plot_std,
-                     plot_mean + plot_std,
-                     alpha=0.2)
-
-
-def plot_all(env,
-             agents,
-             experts,
-             lambda_s_choices,
-             init_path="..",
-             hps=['lambda_s_eps'],
-             metric="mean_avg_return",
-             mode="max",
-             to_plot="final",
-             n_epochs=2000):
-    """
-    env:
-        str
-    agents:
-        list of str
-    init_path:
-        str
-    hps: hyperparams to choose
-        list
-    metric:
-        str
-    mode:
-        str
-    to_plot: to plot best final mean or best overall
-        str: choose between final and overall
-    n_epochs:
-        int
-    """
-    assert to_plot in ["overall", "final"]
-
-    plt.figure(figsize=(8, 6))
-    for agent in agents:
-
-        experts_copy = experts.copy()
-        if agent == "SAC":
-            experts_copy = [experts_copy[0]]
-
-        for expert in experts_copy:
-
-            lambda_s_choices_copy = lambda_s_choices.copy()
-            if agent == 'SAC' or agent == 'SwitchedSAC':
-                lambda_s_choices_copy = [lambda_s_choices_copy[0]]
-
-            for type_lambda_s in lambda_s_choices_copy:
-
-                # get analysis
-                if agent == "SAC":
-                    path = os.path.join(init_path, "ray_results", env, agent)
-                    label = agent
-                elif agent == "SwitchedSAC":
-                    path = os.path.join(init_path, "ray_results", env, agent, expert)
-                    label = f"{agent}-{expert}"
-                else:
-                    path = os.path.join(init_path, "ray_results", env, agent, expert, type_lambda_s)
-                    label = f"{agent}-{expert}-{type_lambda_s}"
-
-                main = sorted(glob.glob(f"{path}/*"), key=os.path.getmtime)[-1].split('/')[-1]
-                experiment_checkpoint_path = os.path.join(path, main)
-                analysis = ExperimentAnalysis(experiment_checkpoint_path, default_metric=metric, default_mode=mode)
-
-                # plot one curve
-                plot_curves(analysis,
-                            hps,
-                            metric,
-                            to_plot=to_plot,
-                            label=label,
-                            n_epochs=n_epochs)
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+
+
+import os
+import glob
+import pandas as pd
+import matplotlib.pyplot as plt
+from ray.tune import ExperimentAnalysis
+from typing import List, Union, Optional, Tuple
+
+
+def plot_curves(analysis: ExperimentAnalysis, hps: List[str], metric: str, to_plot: Optional[str] = "final",
+                label: Optional[str] = "SAC", n_epochs: Optional[int] = 2000) -> None:
+    """
+    Plots curves for the specified hyperparameters and metric.
+
+    Parameters:
+    -----------
+    analysis : tune.ray.ExperimentAnalysis
+        Analysis object from Ray Tune.
+    hps : List[str]
+        List of hyperparameters to choose.
+    metric : str
+        Metric to plot.
+    to_plot : str, optional
+        Choose between "final" and "overall" for best final mean or best overall.
+        Defaults to "final".
+    label : str, optional
+        Label for the plotted curve. Defaults to "SAC".
+    n_epochs : int, optional
+        Number of epochs. Defaults to 2000.
+
+    Returns:
+    -----------
+    None
+    """
+    group_by = [f'config/{hp}' for hp in hps if hp != 'repeat_run'] + ['epoch']
+    dfs = analysis.trial_dataframes
+    conf = analysis.get_all_configs()
+    path = os.path.dirname(list(conf.keys())[0])
+    conf = {k: {f'config/{_k}': _v for _k, _v in v.items()} for k, v in conf.items()}
+    df = pd.concat([dfs[k].assign(**conf[k]) for k in dfs.keys()])
+    group = df.groupby(group_by)
+    mean = group.mean()
+    std = group.std()
+
+    # if overall or final
+    if to_plot == "overall":
+        plot_max_idx = mean[metric].idxmax()
+        best_dict = {'mean': mean.loc[plot_max_idx], 'std': std.loc[plot_max_idx]}
+    else:
+        final_mean = mean.xs(n_epochs - 1, axis=0, level=len(group_by) - 1, drop_level=False)
+        final_std = std.xs(n_epochs - 1, axis=0, level=len(group_by) - 1, drop_level=False)
+        plot_max_idx = final_mean[metric].idxmax()
+        best_dict = {'mean': final_mean.loc[plot_max_idx], 'std': final_std.loc[plot_max_idx]}
+
+    # plot it
+    idx_but_one = plot_max_idx[:-1]
+    plot_mean = mean.loc[(idx_but_one)][metric]
+    plot_std = std.loc[(idx_but_one)][metric]
+
+    plt.plot(plot_mean, label=label)
+    plt.fill_between(plot_mean.index,
+                     plot_mean - plot_std,
+                     plot_mean + plot_std,
+                     alpha=0.2)
+
+
+def plot_all(env: str,
+             agents: List[str],
+             experts: List[str],
+             lambda_s_choices: List[str],
+             init_path: Optional[str] = "..",
+             hps: Optional[List[str]] = ['lambda_s_eps'],
+             metric: Optional[str] = "mean_avg_return",
+             mode: Optional[str] = "max",
+             to_plot: Optional[str] = "final",
+             n_epochs: Optional[int] = 2000) -> None:
+    """
+    Plots curves for different agents, experts, and lambda_s choices.
+
+    Parameters:
+    -----------
+    env : str
+        The environment name.
+    agents : List[str]
+        List of agent names.
+    experts : List[str]
+        List of expert names.
+    lambda_s_choices : List[str]
+        List of lambda_s choices.
+    init_path : str, optional
+        The initialization path. Defaults to "..".
+    hps : List[str], optional
+        List of hyperparameters to choose. Defaults to ['lambda_s_eps'].
+    metric : str, optional
+        Metric to plot. Defaults to "mean_avg_return".
+    mode : str, optional
+        Mode for metric comparison. Defaults to "max".
+    to_plot : str, optional
+        Choose between "overall" and "final" for best overall or best final mean. Defaults to "final".
+    n_epochs : int, optional
+        Number of epochs. Defaults to 2000.
+
+    Returns:
+    -----------
+    None
+    """
+    assert to_plot in ["overall", "final"]
+
+    plt.figure(figsize=(8, 6))
+    for agent in agents:
+
+        experts_copy = experts.copy()
+        if agent == "SAC":
+            experts_copy = [experts_copy[0]]
+
+        for expert in experts_copy:
+
+            lambda_s_choices_copy = lambda_s_choices.copy()
+            if agent == 'SAC' or agent == 'SwitchedSAC':
+                lambda_s_choices_copy = [lambda_s_choices_copy[0]]
+
+            for type_lambda_s in lambda_s_choices_copy:
+
+                # get analysis
+                if agent == "SAC":
+                    path = os.path.join(init_path, "ray_results", env, agent)
+                    label = agent
+                elif agent == "SwitchedSAC":
+                    path = os.path.join(init_path, "ray_results", env, agent, expert)
+                    label = f"{agent}-{expert}"
+                else:
+                    path = os.path.join(init_path, "ray_results", env, agent, expert, type_lambda_s)
+                    label = f"{agent}-{expert}-{type_lambda_s}"
+
+                main = sorted(glob.glob(f"{path}/*"), key=os.path.getmtime)[-1].split('/')[-1]
+                experiment_checkpoint_path = os.path.join(path, main)
+                analysis = ExperimentAnalysis(experiment_checkpoint_path, default_metric=metric, default_mode=mode)
+
+                # plot one curve
+                plot_curves(analysis,
+                            hps,
+                            metric,
+                            to_plot=to_plot,
+                            label=label,
+                            n_epochs=n_epochs)
                 plt.legend()
\ No newline at end of file
diff --git a/RLLG/docker/Dockerfile b/RLLG/docker/Dockerfile
new file mode 100644
index 00000000..00f8cdc5
--- /dev/null
+++ b/RLLG/docker/Dockerfile
@@ -0,0 +1,29 @@
+# Use an official Python runtime as a parent image
+FROM continuumio/miniconda3:4.10.3
+
+# Set the working directory to /app
+WORKDIR /app
+
+# Copy the current directory contents into the container at /app
+COPY . /app
+
+# Create a new Conda environment
+RUN conda create --name rllg python=3.8
+
+# Activate the Conda environment
+SHELL ["conda", "run", "-n", "rllg", "/bin/bash", "-c"]
+
+# Install the package and its dependencies using setup.py
+RUN pip install -e .
+
+# Install gym
+RUN pip install gym==0.21.0
+
+# Make port 80 available to the world outside this container
+EXPOSE 80
+
+# Define environment variable
+ENV NAME rllg
+
+# Run main.py when the container launches
+# CMD ["conda", "run", "-n", "rllg", "python", "main.py"]
diff --git a/RLLG/envs/ball_in_cup/confidence.py b/RLLG/envs/ball_in_cup/confidence.py
index da13bef7..58f4ce8b 100644
--- a/RLLG/envs/ball_in_cup/confidence.py
+++ b/RLLG/envs/ball_in_cup/confidence.py
@@ -1,37 +1,84 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-
-class LambdaS:
-
-    def __init__(self, pos_tol=None, speed_tol=None):
-        self.pos_tol = pos_tol
-        self.speed_tol = speed_tol
-
-
-    def get_use_local(self, env, observation):
-        # check if ball above cup or not, and check if it is inside the cup
-        cup_x, cup_z, ball_x, ball_z = observation[0], observation[1], observation[2], observation[3]
-        # below cup
-        if ball_z <= cup_z + 0.3:
-            return 1
-        # not inside cup when above cup
-        if 0.3 + cup_z <= ball_z <= cup_z + 0.35:
-            if ball_x > cup_z + 0.05 or ball_x < cup_z - 0.05:
-                return 1
-        return 0
-
-def ball_in_cup_lambda_s(expert,
-                         device="cpu",
-                         pos_tol=None,
-                         speed_tol=None,
-                         smoothed=None):
-    return LambdaS()
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+from typing import Any, List, Optional
+
+
+class LambdaS:
+    """
+    Class representing the confidence function.
+
+    Parameters:
+    ----------
+    pos_tol : float or None, optional
+        Position tolerance (default is None)
+    speed_tol : float or None, optional
+        Speed tolerance (default is None)
+    """
+
+    def __init__(self, pos_tol: Optional[float] = None, speed_tol: Optional[float] = None):
+        self.pos_tol = pos_tol
+        self.speed_tol = speed_tol
+
+
+    def get_use_local(self, env: Any, observation: List) -> float:
+        """
+        Get the lambda s value based on the environment and observation.
+
+        Parameters:
+        ----------
+        env : Any
+            The environment
+        observation : list of array
+            The observation.
+
+        Returns:
+        ----------
+        float
+            Use_local value (0 or 1).
+        """
+        # check if ball above cup or not, and check if it is inside the cup
+        cup_x, cup_z, ball_x, ball_z = observation[0], observation[1], observation[2], observation[3]
+        # below cup
+        if ball_z <= cup_z + 0.3:
+            return 1
+        # not inside cup when above cup
+        if 0.3 + cup_z <= ball_z <= cup_z + 0.35:
+            if ball_x > cup_z + 0.05 or ball_x < cup_z - 0.05:
+                return 1
+        return 0
+
+
+def ball_in_cup_lambda_s(expert: Any,
+                         device: str = "cpu",
+                         pos_tol: float = None,
+                         speed_tol: float = None,
+                         smoothed: bool = None) -> LambdaS:
+    """
+    Returns the confience LambdaS instance for the ball-in-cup environment.
+
+    Parameters:
+    ----------
+    expert : Any
+        Expert (not used, but here in case the lambda_s depends on the expert).
+    device : str, optional
+        Device for computation (default is 'cpu')
+    pos_tol : float or None, optional
+        Position tolerance (default is None)
+    speed_tol : float or None, optional
+        Speed tolerance (default is None)
+    smoothed : bool or None, optional
+        Whether to use smoothed lambda_s (default is None)
+
+    Returns:
+    ----------
+    LambdaS
+        The LambdaS instance
+    """
+    return LambdaS()
diff --git a/RLLG/envs/ball_in_cup/create_ball_in_cup.py b/RLLG/envs/ball_in_cup/create_ball_in_cup.py
index 17ed5a03..3f975510 100644
--- a/RLLG/envs/ball_in_cup/create_ball_in_cup.py
+++ b/RLLG/envs/ball_in_cup/create_ball_in_cup.py
@@ -1,55 +1,77 @@
-# 2023.02.14-Changed for RLLG
-#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
-
-# Copyright (c) Deepmind dm-control.
-
-# All rights reserved.
-
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-
-
-import dmc2gym
-from envs.ball_in_cup.local_expert_policy import SACExpert
-import os
-from types import MethodType
-
-
-def create_ball_in_cup_and_control(orig_cwd='./',
-                                   device="cpu"):
-    # create env
-    env = dmc2gym.make('ball_in_cup', 'catch')
-
-    # modify initialization
-    def new_initialize_episode(self, physics):
-        """Sets the state of the environment at the start of each episode.
-        Args:
-          physics: An instance of `Physics`.
-        """
-        # Find a collision-free random initial position of the ball.
-        penetrating = True
-        while penetrating:
-            # Assign a random ball position.
-            physics.named.data.qpos['ball_x'] = self.random.uniform(-.2, .2)
-            physics.named.data.qpos['ball_z'] = self.random.uniform(.0, .25)
-            # Check for collisions.
-            physics.after_reset()
-            penetrating = physics.data.ncon > 0
-        self.after_step(physics)
-
-    try:
-        env.env._env._task.initialize_episode = MethodType(new_initialize_episode, env.env._env._task)
-    except AttributeError:
-        env.env.env._task.initialize_episode = MethodType(new_initialize_episode, env.env.env._task)
-
-    path = os.path.join(orig_cwd, 'envs', 'ball_in_cup', "models")
-
-    control_dict = {
-        "MediumSAC": {
-            "coord": None,
-            "local_expert": SACExpert(env, path, device)
-        },
-    }
-
-    return env, control_dict
\ No newline at end of file
+# 2023.02.14-Changed for RLLG
+#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
+
+# Copyright (c) Deepmind dm-control.
+
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+
+import dmc2gym
+from envs.ball_in_cup.local_expert_policy import SACExpert
+import os
+from types import MethodType
+from typing import Any, Tuple, Dict
+
+
+def create_ball_in_cup_and_control(orig_cwd: str = './',
+                                   device: str = "cpu") -> Tuple[Any, Dict]:
+    """
+    Create the ball in cup environment and its control (local expert) dictionary.
+
+    Parameters:
+    ----------
+    orig_cwd : str, optional
+        Original current working directory (default is './')
+    device : str, optional
+        Device (default is 'cpu')
+
+    Returns:
+    ----------
+    Any
+        The ball in cup environment.
+    dict
+        The control dictionary
+    """
+    # create env
+    env = dmc2gym.make('ball_in_cup', 'catch')
+
+    # modify initialization
+    def new_initialize_episode(self, physics: Any) -> None:
+        """
+        Sets the state of the environment at the start of each episode.
+
+        Parameters:
+        ----------
+        physics: Any
+            An instance of `Physics`
+        """
+        # Find a collision-free random initial position of the ball.
+        penetrating = True
+        while penetrating:
+            # Assign a random ball position.
+            physics.named.data.qpos['ball_x'] = self.random.uniform(-.2, .2)
+            physics.named.data.qpos['ball_z'] = self.random.uniform(.0, .25)
+            # Check for collisions.
+            physics.after_reset()
+            penetrating = physics.data.ncon > 0
+        self.after_step(physics)
+
+    try:
+        env.env._env._task.initialize_episode = MethodType(new_initialize_episode, env.env._env._task)
+    except AttributeError:
+        env.env.env._task.initialize_episode = MethodType(new_initialize_episode, env.env.env._task)
+
+    path = os.path.join(orig_cwd, 'envs', 'ball_in_cup', "models")
+
+    control_dict = {
+        "MediumSAC": {
+            "coord": None,
+            "local_expert": SACExpert(env, path, device)
+        },
+    }
+
+    return env, control_dict
diff --git a/RLLG/envs/ball_in_cup/local_expert_policy.py b/RLLG/envs/ball_in_cup/local_expert_policy.py
index c0f71ffd..795c11ac 100644
--- a/RLLG/envs/ball_in_cup/local_expert_policy.py
+++ b/RLLG/envs/ball_in_cup/local_expert_policy.py
@@ -1,46 +1,75 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-
-
-import numpy as np
-import torch
-import os
-
-class SACExpert:
-
-    def __init__(self, env, path, device="cpu"):
-
-        from agents.common.model import TanhGaussianPolicy, SamplerPolicy
-        # hyper-params
-        policy_arch = '64-64'
-        policy_log_std_multiplier = 1.0
-        policy_log_std_offset = -1.0
-
-        # load expert policy
-        expert_policy = TanhGaussianPolicy(
-            env.observation_space.shape[0],
-            env.action_space.shape[0],
-            policy_arch,
-            log_std_multiplier=policy_log_std_multiplier,
-            log_std_offset=policy_log_std_offset,
-        )
-        glob_path = os.path.join(path, 'medium_expert_sac')
-        expert_policy.load_state_dict(torch.load(glob_path))
-        expert_policy.to(device)
-        self.sampling_expert_policy = SamplerPolicy(expert_policy, device=device)
-
-    def get_action(self, observation, init_action=None, env=None):
-        with torch.no_grad():
-            expert_action = self.sampling_expert_policy(
-                np.expand_dims(observation, 0), deterministic=True
-            )[0, :]
-        return np.clip(expert_action, a_min=-0.99, a_max=0.99)  # expert_action
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+from typing import Any, Optional
+import numpy as np
+import torch
+import os
+
+
+
+class SACExpert:
+    """
+    Soft Actor-Critic (SAC) Expert.
+
+    Parameters:
+    ----------
+    env : Any
+        The environment (usually dm control env, could be gym as well or others).
+    path : str
+        The path to the model.
+    device : str, optional
+        The device to run the expert policy (default is 'cpu').
+    """
+
+    def __init__(self, env: Any, path: str, device: Optional[str] = "cpu") -> None:
+        from agents.common.model import TanhGaussianPolicy, SamplerPolicy
+        # hyper-params
+        policy_arch = '64-64'
+        policy_log_std_multiplier = 1.0
+        policy_log_std_offset = -1.0
+
+        # load expert policy
+        expert_policy = TanhGaussianPolicy(
+            env.observation_space.shape[0],
+            env.action_space.shape[0],
+            policy_arch,
+            log_std_multiplier=policy_log_std_multiplier,
+            log_std_offset=policy_log_std_offset,
+        )
+        glob_path = os.path.join(path, 'medium_expert_sac')
+        expert_policy.load_state_dict(torch.load(glob_path))
+        expert_policy.to(device)
+        self.sampling_expert_policy = SamplerPolicy(expert_policy, device=device)
+
+    def get_action(self, observation: np.ndarray, init_action: Optional[np.ndarray] = None, env: Optional[Any] = None) \
+            -> np.ndarray:
+        """
+        Get an action from the SAC expert policy.
+
+        Parameters:
+        ----------
+        observation : numpy.ndarray
+            The observation from the environment.
+        init_action : Any, optional
+            Initial action (default is None).
+        env : gym.Env, optional
+            The environment (default is None).
+
+        Returns:
+        ----------
+        numpy.ndarray
+            The clipped expert action.
+        """
+        with torch.no_grad():
+            expert_action = self.sampling_expert_policy(
+                np.expand_dims(observation, 0), deterministic=True
+            )[0, :]
+        return np.clip(expert_action, a_min=-0.99, a_max=0.99)
diff --git a/RLLG/envs/ball_in_cup/models/near_expert_sac_650 b/RLLG/envs/ball_in_cup/models/near_expert_sac_650
new file mode 100644
index 0000000000000000000000000000000000000000..4b0ee88ec0f8b23d7689ea6e139ee30f024b70db
GIT binary patch
literal 22735
zcmb5V2{={n_dZPKDJ4@xNQFunE9X4x7)lBmN>PSP8KV=LBr_SB3{jawQX&nC{j9Cf
zq|!i|C=Du2k|r8n-|y%BUH||0|GvNXcfIGj_O-9G*E;vU_w(#$?X}jvkE5-Suz-NL
zxWND8GeJO3z<ZN#V9;j&xqjY}-ufFuLai;w3#|E%Puw_P-|)~-f8WTU@URF;hfRL|
zoBaJ21^Grw@&$CA`GPvGYGcqo-Vy#@Vg8X@!Z(HJ|JQz={uckBfWS!pIN!+du`}s=
zMMVaMMo4;X^7n}f3ib1f^bd;&-{iH~P?9gCqvI+z1{5*o=^fzD7yjoblGdT&-jOT*
zIVi~&aTOf%na3A(6&l<7Zj9oK*$a$|<cmAm3fP+4il^`;Y{hK_Q}~i;f&XGB70H)&
z=F7OM{`VApg1jU6;{(UeA1EC75556kHc-sgJd!`*->}L2iME2te7S!hCq?q*o%ss?
z{Z$+O*HtV22dE)mDR7MJgh;;fKV;2Q_>;#tkC9dR2U#_eKgF4^_TQ5=)c-eG_5Z*%
z;!pjDtj7OMR`XwEwf=#e7RlFk=1>3c$r}F`*%|)<YRsScFS3G>e4T%x3fqp6)%}O8
z-ap8*BKfnO`E&kzvc~_A)&CD%6aL&Wvci%4dH;q@<{SKrtl>Y9Mv;7DXTHh5Aw$Ch
zydom~yf#FIMh0yR4f5Zl?>lz?MS6!t@=gB((v&}c404R=f-y*4-G87KM)J*^`5afJ
zv3dV5sPOgcBm5)(7Yz45VCM75KQQpW!SEva=FWVJs2IMbtHc<T*9QMc?|-cozSX~P
zuSGhpV@Cz}i(~lK|Gs-S`)`UETPOS_wt^9^(qpFoeO>TvV)#n~|80{0KTQ9%WcbTs
z__qJJDE>Dt`t$g9F?{>~!O4KXJcjS^KR6lk9b@=T|AUhe-#LcA;(u^5=C6$5yZqxM
zH%9!w#{7RxAAeO0fAv2O<Nr5@e<p?R8pC({$0##q9Gm4puK@3;h=?HXF#eht{@Q=r
zq9T0s9c{(L?I$v-d&h#>zc$DJHoT1&F!_)0Hg}%2rNRFZ-2Nld{l9;?n!b~m^v<F|
z1s@?!YX!VmSwNZ%I_QTd6W|@)!n1IHic{|Ru(op_;;$KH>`rZA>~=MxmS3Nt;&u_T
zH@TZ<Bff^Go%RlAH_n1onOW%9H%tQUcA@t4NxZ?pBfRxmAIPxAGu~7F9kS=ipN0lU
zONd)71cAD8yy*GmaKq;_co-S7DtCtQZhZ-!X_jS%`UQEpo<^{2vKX&*q=Rhom;>!j
zC75G6f-1m6GplMy+xrx_w3OK^O?j$a8K~nGz&pd;!w9ae11B{pSk_ie`b4UDb2O$R
zrk;nU2kEf0(~ER&S&4?dcktf22%6Nbz`N2rAC?`rgHv^Nyoi{?ppcpek}FkV(v{uZ
zNaavAdcQDlXjp}v`t><{k-bDUs~zCis4VYwyEj#tUdqmuS^*t<523&OO%zC)N~68|
z@bkWj?5?kiA*M%|-I-j<vut*O;oxb!oinm&#;X#j3K~Qs=?KVnk>_Q2`SO<d)j&x3
zF1Apn8q(d&pluT&9~x}v{4c`n$qNp=4x2WR$`~dNK_@{@G89PiZ%pud3auT5Jco_J
zyht~3-WBy<<oL5}&eMpGM0)>q6p(fU)j!p&@~uhKbNx<s&)saE&hl0C_*4rt5}Js2
zzuzI#YmdX;;(GjSxe2P8H5jjaBQ~gcJ-~$wQ1+Sx+s@76ZTK@2hPR8*%;sU@Yq%7g
zvnWi+S-{&RmjH*J^|Lw#?eJS@3HxfrHlAy0F7whvgM9gA%8Cc7Q=uPOuvRsdXWtwG
zUcV>uUI*@`p?c@(4(D;W#N{xaxE6<>Hjm?d>~e=<$p<)Hy%dc%{iOo)#Moc&+ejvp
z&bC$hv&{ygD6hANo!*)PuQqG2+6IfU?`J7oN)IC&)(G)T`Z9SjueY+vErcA4KLC}L
z?eN9_H0TFS;<d}Dvw9W!yi-*RL2&RUU0eGC+G<ijF~u495l(EHpgjBZY!3S^`Xak$
zpB$;&nTXz+VRXTMAKV)*!P?hO=EZDd*emZ8Sb?EiXnkr9ol~|L4jbz*BVQiCuL-3%
zVQoE}d0GxU7lvf5-Ho3Ij*vNTY9U=&5*JSw;C*;<hPOIC6`2j`G{Zs%OwP`N_FY1Z
z?bJ@zS2Bkex>=Cj`{NwGKb6IO)G`x4S_r`=`5U0E7{<G`$AD)hQo~!nVJ|N~)*WJs
zJMd58BNTmDj8R!5c+I+l>IGD>S?Q55MNc10TpBU9_b9m_TE)zs<HQEd%Ei1hPx0D^
zetcDbA67Iu;YQUo_K$oi-f&HW^G?M)H}^T{TX`1JjtSt8gn1x4(+(@wn4`$uqr7E;
z_Gq!l54VU1@MJ&T1oNR>*po4<=}c!ncH3^|y`7$h9_v@~Vug$FbN**gC2QE=%VO+{
zl?l|P@e$yv)p#6%{WUv@x41Wy9u8IKeR4fQL-g9o#G!arGH)kHy^?BHE1SU%*#6>*
zPWeF&<OieBEEnq0au@yve&F(IYOpc;EeILj!sX{K!w(f9@P81C!7sFU<%gv(>}Ndh
zs8TKO&;3o@cH1}Hojr}1a6^myamyy6cN$o&)KBnpcqyx@SArH5pK0jNa_D(c#Ys@=
z<`sCP@{TX4N7eJTyncsipkK%XW4Q(3biaW-YxxXKC-vA|lU=ZIN+|w$u>opdrZ9!Q
zn(UqAI5KI|H@NlbFtzTvh!<pZc!lC8$?dpv=<GR$zc>=jCU1ks3fDnWB#igz%2ANG
zpGm|!deQM_5nZ)Joj19yg|53{#+w=K&s)+}!VC8@frAU?f#)0v_OiVgYf+NL>e(kU
za{gVc;m8Yo-5!qv>lX0l6kaC&p6Bp`e-#;YS<L&HzK%ulVXoO`Gm!Z?NYfXEGve-1
zpzkeWe&|9gZczvX3oj8~w!k|48dX3e?rntI!s8*ytb_dCei54OwP204E7*%B&^+!d
za#OgV$$i>AMr*bx%%88wuFNW8rS{L}<;wsJntei_iw|%yzX$dQXz<p(Rbusm#Mzz3
zGqBfcB3KO#(^JPpd5*y$*fiIT_cS_!x9na96g@TMDWxt2OO<ZoV;77M&&5Ok+b87w
zPfgZyr9B({<}yAi>EyO7-G`aEVZ@2E20pD9H6Oll9u_FC;f0=d0&C~3<j6)XwCnW4
zXQSHC>Yc;O+I$uICS{{^#4B7ZFUCqNRAh4%8}Vv%26Zc?>}<|Sw$s3gh2J6UceQHL
zzIhM)Fj@xpa)jCFdG)-EqUYEdR?CY$9?xrt)+XsXt<V|yhi+P?%#~QPlvnX}5AO}R
zNo9<bdDEwzz)!}e^v7INP){sD$lA!$yWqiV7mH($cy#c5=vvf|HN}SHg_t^>(n&Jn
z=8^Xo;aO{OUjMyXx=7TT^2=L@r=cG;OIF0ZHyqx{S({ne$O}Lwf5oL)Nz`J51FHnz
z!t{tMa1!EaMpi9$+;n1(Z_HqQUnbIp-Bw`zWIFG?aVKt<sc16Smt{u<jCqor*SuPD
zTU>DL0&$Jm3R7~&(f8Z7@-oRn?okN^cK6^Q&v#gWGnAK1ryT8|8?M{ae<d8MDYQo5
zUlr8Y_WzY|2>)loVPI`3ARyuQ8|MkzLDaS*)Zk_;^Y!m;=zo%mNngd7Iemwy(w@J>
zy+s#IjE_U8S2W0|Il$k^ZWwZQKYU$N2&(rw>6<VqcHzs}kmcS25jBsS&c`#v%f|~c
z*dETW1$OY%A&0!{*gzMgSc7^{5smY?g#pe=td)Z*-i&L6FUQ--&k<*gdAFHHHKl>f
zo5LV}{}Mf1lh?F;%N<y?FBL}F5Qy8f71w7<!Kd?+QFvrF6rFD2E_S<#CEqN8`qYw7
z!h1nb{yK9$IElObkT$O2EF*ib?4bL0oQ9TfYoYZbm)P`Mld`ktkZo20pROMG*Z3{=
z(Y!D4uflC?`~NY1hW{GB9s3&bC*P25s6R=iHr|9ITU|-GN&?fTbf3P=ujg9HrI3Ep
zY&z~yCn$@T!jkV3c;@eqlJd`0P+EG9{i1M-iieE{k(UeEnr<7O^qx6j{Y)P61ghYJ
z`ycG<cVtCXCV}?O*SO=b7H{LSwXDts1^oH$GugazBb}Wu297yeSk#uvdE?WG5kY#m
zQKf|Z>AHvyet$=oc?V(3gKUzr?Kp}Adth|gKK62OE|G{!p=Jz0<(7%`_{V;<`#Oy^
z^lpJ!OSM>yoMlk>vlz6hh1pj-PB+D_38t%B$2S_ARM8!J`^fzpohW%FkUjWB2gCl}
z;SNS^#}~e}FtUFNuW0u!GI_T*2;cZY^>&St-5b+U%)J%&ZxKZ4^+)Kok~PSESx$cl
zgwxc@Em#pgN@SI-;h{w#ZYi~(W}hE|XvS`QYF`SqS^3zWy9yI$%d^>D$*{<z5u!G{
zLhrt2qCiHu$=*Al`q@srp5>3vKS#1g`%{Uzv<vGbUyKHIdGKmeHw_StVCwE#VDABI
zNEbT}#$m45_{0umKlx#PH3x=nmyyxQM{%e6BG|S`9M5ikiFDZ;Fs?`=I%=EP4ZIm7
zZYUjm67AT4XJSwobBesHxK4|5eqi&N9vr!8#rR~Yq4nTCvUNzFx6h>$6HZL#z4v%Z
zBTd|>*OaYT_h<@xFSLQ{U(ib5h)0>(*c*XwYX?)*{)scOPn6o7Gr%IxSvX135u;I*
zeQdatL_5`h;$LOf$l?Vlxz~hOR_URzMIQCf7iEtgNCneZ<JcF|2VgiUfNf~GOd7jm
zVBv;mbXunX&&By9-kQh4?T@=~+ecXx6!d_n^UgEbj$X83TN&P7X2VXk`An5gP&iQB
zOzK4ofP45b(f1I*ZJB49O6E)kwUtVcdNrHPO&jE1RM?8Tb_297!I3<>Q%G#Oaw)$`
zfNkGljh3r^P+oElt!WbH)?E|DyM;Df?ShY7TRS;ouRlb89=l2wM7Po-XR?Tr>_mK4
zHW5EQ$smdPhna`9Sxr?BuhZ`(wUn8eNYg(}Mvb0Mj*X@m9jxkM8b*}4l57&Wtf|i7
z1qRb|)h+Zu`CGbl&I7v4FpQ3LZl~RTlslf5(c8Zq$etYo^jxG8&HHeS+<V$bL>E?4
zE35rXzv&TXH19DjTYG@sJle{Y@0Ugm)jv#UW(9X&%Tk&Z=0Z-si=q1-w{z2eA0-ug
z&d@HY7hJ#n2dU{ETiR5*gq{ylq@g~CX{RO2Oy|@vcMW(XPuq^V?HI;FwRgm&8nHoz
z68A5U=w%aMxc%FSefujE_U|ji&#6+7XOjbM3iojKluWWLx|<a0b>VF*akksU0JbMI
zk#KV}{Pk-J8P-QssV{}TlL|yaX&&sDHW>;(IkM8>SBTabQ#>|Hg>>n0VO57LoG8g5
z`dlOWx7*L`V|zJh*zJR(H)?=$Y%Cww-G?6@YBg~L{-UOz2GA&BHmuPL`s;px^SN#k
z*A{`+pEdA8v=*&q4<OBL1le;h>6JVy-i^&NP_a259-fQ?<(u_*;E@kJ6fj`#CtQQ7
zvM(SpVH^hWSKz@H1SdOmV_Ud4^}Lu#LYFfD4}&qgCkwB?w`IPac?l1c3PC4gJKLu=
zo(-8IOxBOgq~n5IU~*^;#KnGNxcaHs{??Zy6(jL@dyx4OFU!-jjzou|r})<69(uiZ
zU=Ma^!n=UK*uO^&#;tc{m+Uu08%t;OSs@QQ?!Tc&{Y81Vg})OF;eox36N`hXz)`zM
z*X-TId=%JAd}KGkrI7*JrT7SC)U41ZYB#>oGbX0*Z-CwtSvu~V5G)cDf)w2bW{2-3
zOi}cKSJ$?q_oo8JwrvZ%x^ol7xLq{j=pe}N(S!=4DXeVccFy#&1`zZuB%u{2I3@bA
z&>-S~_a5oeN=Z>xgwciMrbaq9I){nV@1Sowc4K3iGxh7q#F~z7uv#d@9*gdwJ6lU|
zWJ?ZPmg0v=14H!a^dU$XXuurp*_bY-gF6LXAicgFGZjxzzhyDh_%?^^NY1B|+Xt!S
z#`PrCrI$n*SJC2;b=dNA22A=DM+&Bj)B4Tk)JkbNPX1~I-|Yn0KEVe1rreit%$N&n
zJ)FR^>?DnPDTLR<JejYqJve%D1slC)B3=uxr!x)>QvoGaJlb1D67P1?%J6Ya#u9m~
zIG)_7zAlE1E}6?t+>?ghHxw~uYcigDdjxFcg>hZwLE1O_H9S3cjyovw0IzZE*>e*$
zac)NllsHhBx3h#K*IB{6b=h#_bvpC(!9mt1po_c>?!tz`5^Q=Q14aRkyh77nQbVl3
z@!k=5du%BzTT=q7r^&J<Ukc!{)L(ieVgi_5Rf1-nb)@x>GxRuxU`@ptB0W$>htALE
z9h=|*+ZW72`$8ca>k|a-9+_zCHJ6R;OvLHp!aVIO<9VZUyFhir4U|&$BnDxE?C2i`
z%-j4(`ke|$3falVu5Q5P374Sv(;i&Z>_G#!2(qu@<yrS}vW%8?DOEb0&wZ^w1!trl
z!uy*<aQ3W|uqI4`S6nRt59;=Uu~IW!l{)|-=`Ps*NRnla&St;mO(L8iU)W;s39~P^
zf}PB<h7EHku`c;Bs5<llMfD6ZbW<iR4mE*aB45d3(^2Z%c#%qsydr`Hv+=Qb9$D7C
zjk?}0Aamz^qRP9Dqi5U<nD|N=zsn`!j|t1z_8-5<k*(F#z{npP1X#wP{1|+@cY-!$
z*+c3ANff^F347142kjeWQ2b{X2&56_X-hUve7p)OHR9=%mAR<fI-92$tb@LRm$Cb5
z73@+kXlgS^qXJ#kVEr;4EGp*XrF)~?+b1~ew~`XF?T8Gnoe==9pPhmntsFd8P)+=;
znrX-k1zwx^9Biv<ZG3xlH%u%#4f&zkP2+ag<D9AqAmDx)?B>5DUoFz1%~S`p<0`od
zKgHQDEqSo|FcJT3{=mq_%VO$=X)tU19mw(br-?@t@c8>K?7v?Ibzv2>ugiuUjTNAK
zIx|66(VfS37eZ;-G+bipfU1Ai;rz9Ruw&6ah<KAiA6%#;!9CY;?h^(D1E<oTJIl##
zk!|ElyEN~j`99LOS{asD3o;r$i(y}*I9%+#OqPFgfiDHcoY|uJaO9LX70ptB)9;#T
zXF(}!cRWFU-F-z5$zQ><<^ZK1=HmJMzvS171$g0&5*#1RAjLIzsc?S+CUTdu6J?4}
zejOhga<XvU+g$vs8;Ekb_i<kP8tNLn7|Tzs#=){4+&YkoP6iXPX}&&YK5fNAisCpy
z{0~>SSQ&+azu*t<ZhU(R@Zg8ncx?GCy7#*y&D+s~5+4yO&(EXGBp=p2@*&Pojc2d!
z&xI#_!}PP;Dw?+?nT#g1;G_lW@blCIqVw@$)1@LaQn%?5-H^5qChauB(U;?yetAzA
ze|m^y3*N$#S;bhszZkx>9Kpkv+sOUiBq#`aO!B_PVcD=SyG>n@R0*y`N7_P0T_3<F
zCn20CrHDqwVt6#73<`F~1C!wgU+Zm%uWSp^*v{gGAu;%vXoTll8|Z-CH5%mW1LJNi
zp%xjJka<4`y?3N=qLnp4`Za+x=>zmY%Ni!=`h9ZKI~T7#QDBaXe&L>U(c`H*Z$aI)
zZDePTHt`IQ2mY%%s*zENgYP8C^r_bLwdxbx_5L1N7Zn1&E#^&M*DVBzoF-J!K8vTS
zW3m0%eR5)^Fl{q;9<xj09ywW*mGOn)rT}m>dXB3qONf}{0Wk20$ED?R<e~Kvh#Ivb
zbk;Q3*wI7Ar<9Un#}0hgK8MxWy$fDknT9D3mgC4iK8pVNOa0eg2OsVP5DX1tNtXfp
z$FBm<jpccJgcaGJi`?PT%_*po8^?wWmQm3#UA9R;7h5|5FgNHSF3K@xOZtwpcA9fo
zecNy3m*y$#HkeM1Sv0Wvv(j+aw@f_sYyzh1Eu*&44&XSDiB`8`Kxg?}<UFdyvj%N=
z$4i6F;M;%)+e0@^Rl}ao95zNlgN^b$gexz4V6n6Zc>h{Yeq?3QI-V0ed-IBZe8PZZ
zg)pR<N~5IRdvx_TCCx7+*o~J~VQ5h+X={H-&wo9Gq;3got>FOb9VfVw`d4X$`bEy;
zn$`5jk~9*TP)m$HA17fKQka^}TvB&S5l=5`q4Uz!n6Govnf<(9bkFQ!x}l(&hKx=?
zi73|WRz@V5JwcJwFT6oFL@45_>mTXC>SAWS!Ez#+y_Ys0N~il{>S^i8DU?$?0c(4{
z(viUX<X8JqYBD&V&ig)(9w`#QUY<Rz%;$5Z@7$qo+!01YqnUA=EQf_&yU3)^`-xrM
zDiVHQoi56lN;tnHaM1ZTbJO04R&4X856=AJMhhoUD>D@ueS%Bp1&HC{goX4&h%i=G
zEFt>KXOaGl%Op(Kk@}q5z~l#@*{ZW&@tH*)1{mi;1izho;<E}AiNt|$=OOaMs-DE?
zI>HJm4M^Gi7WJ4rWN@!HSWdplnY48d$Uf4B9~VEsY=JnqWiuJV4%*<E*!%Rr`U}{Z
z$6{etHhhV1!av12Jiq8N`r@}ETBx1HFQ+H6r76m^jm)L*vL#{go;uhzSb~k(QJ9t)
z1>Pqlz_&7=zR6OC=TgF4pXb(O&atKRqU%Fu{zh9?QR*tHUf2(heh$-5%ZEto!t+r4
zU>~F_j6&bcMB;vF9qApm!(aD4;uU5V95R%J(vO#6|I~-@HAxpX&UgrIQY~a3a{x3`
z9EiA0868zfgSSGZus6A%v%=sfy&tGZl-B=%+}E5YtJiV(Xa97R`^MnG&Yi?fH5_Vm
z65)u59|)b@$nBa|LX-<A(VUtDqdMDA>Ba&iZu{}Y`6yK0l#9;K1o5_;B&Zr*r=iJ$
zn0F~3Q)4XwAJn6}_X-#p{s_k-&(JX4QfBtf-K1lW7Y6m|64!cVwwv<+6ed@~&*Y^f
z!{sjXEIR`aLLY9d-NB6T$GSpm{m7j6%elJI+4yYXANY#Tv8}}k{3d8MopPIlV*DzI
zy7r75ymTIxued<3$=#wqK8v$U9E*u#X+J5NFTvjDO<~=#vmxfW9Xfse)uev&5?+6k
z1mj#OKEC(`T0+;u)WvV<PD2Cul<R{pqc4H?A&sWGO<d5a$tFC#*PH>(57gjc7O@ac
zgOqLwbnrin<Ijrl&Ziv3yJ6)pJG#6n>yIsc>dJ6$y%FcVd|VCEQ!b(B<s2|taF%Ns
zQ;wI;`h(vVgvW;Sz=87@PJPJ5XR(ub3G+_kLq{txJ9hzvURv|U>B_SA_BZ17MUz;~
zrRSKpPk+%r_om=uU29l2uMP&IoAA=DFI0VXDjV%R4-Dt+#n^*d%<D_vsO%OCaw6+P
zqu8i3C?zIA$v6YtCa1vK8!I8d%o_?467g_*I8hWgXp-9dn>fm{D0xQ;r&cIH@ac;f
zE0#dtjOFS{mtt|mav^@xb)_3EPmzx9ub6!+nOf-vbH^S!<bK<I3<{jeUI;(PW|U9E
znrYdL`Iea&_Ev^n(U%KaNqazf)=I#zHMnN{V`e_KL+fA_O${H$M~U{t$t4Bet1qUd
zLeuc~s1|DGd%+goe75-nm*p%=z|nQTX@cW9i0SrV&Xhd>2df#}uBAoHfYvq8loW&w
zb=e^39mQ&9q@s_ACHb;)FSjiJ25x_!NfH{zV#JzAe3+ES?GIedt&*z2m8*Kl-Gtjv
zuYLedijCDxVL^~~Rhj&d702{%6WBPvKd7|YALC=3QF47hm{qNTp7kr}**n${;@N}c
zZ-qcE&;<QW>|mc9mp#9K0c>CC2VFhuSyk^!Jo$-`f#Lz;@l_m`+!y1_`k{y7ckV#t
zP8DXlX(2QHbrU^s`42-j7_bN9=HmU#bnc&Dx4CQb;xRW_joOZ$CU@p{ki5*rU~RSl
zzXzTLhd*Ytxx}1mygEjfoN~tSrqg)9o#O5KcKEnSmYG&&!rl=Ey5a2(aGXE3YEBfx
zsz>3tVbc_tveKHw3#t%>EzQ(t<_O(vJOG0o8FYo+C9rJ!3#MbexY;k%SxJwhsAyk~
zCBtWFzlR$+X<moZ^1ot9ks{7_%cC#FfHatuk*jCC*vpCbG=8TB$U9e&itt9bP^Axl
zZ3S4K#SjK9-9Yw7%0O?TF9@;5SY_ad{$b<U#7JRUb^QUd{e*o#V<x-yVm#}A>o&%h
z6=H#m6V|U60bldwti;C}=$Y<G#58}>!QG-fhu}5rg=MEm^zt04P_Y`<oL0v2ol#i8
z2+_<Q0jRgFA+iHrtY2F#ozt?LJh-xmrWHuDHZ^NNYiO))s&Ax^w*f|cGei?LbNWL=
zh<C907|0(jgN24>P(S|;30OV}Z0~ZxUSu45w%P$x?b2~LTMd;82hhG|7MfS>#{L=c
zWa*wdA|DWoIX{4nKBYwFgk8Y*{v146z7)k<4l|KvGeJedmd$P5i%wZraJq~cTXon2
zJNMs)fM=PQWBrp>n)6uy%ug`weg~7Uc>*(rCb1&iX{^ciQ^eb602fDI#i7NQQ95Nk
zn|UY(hWT-v6Jir^-h!uG6H_jl3x5Q+keRGRp&qMuDVLPx=W*rP`#9Hj16een1@`x{
z>HHlP@K9R=F6xa^dDCR9lTgL^(P4COi#YJJOTpm!GZK#eU|ly0>;gED>&pi%!(i@6
zsRM6c>ON?0UC6qfD~1i9(m?j8AXIqdvg(E{bh`XejBC7!Qo*`#+#{Z;^7Et<eqDv0
zBP)o3Nhxgk<%!2%r^ADr3@8O8LeJHGL~dvX&#0&l0(2fW-h0|h9?=>m<4``XesC8&
zdhbxxDpmI2LhG?HDuPV+%~V5M68eTJ$x33#eq1>pw#rA)1%DQ!NIz0vg{wH@umeqL
zX{5Gr8T`c?;Rnfp+mEJ_Xr>U7!;-=G!7}XEumrn=-N??iU<Y0qvCrowqulRHFuwaB
zxSK8m+iid7^-NK!HpP<Y28*Da>RYBXxDz;*p7gkn7W_J#LkgYp@Rq{?5VcN(&-=3>
zq|z3=rCo7LXAyMFoDBlynvmPlOZD8lXv&#AfKPbf!YP9+Pj#~<O#$$o?FRmJ3Q({w
z4g=Z+aPRvakaQrEak$%s&13yP&nu#=^yqe&J9L@&T&`sviMm7Bv{5>KQ!ywmN`;+H
zg&=8OPQ#dYoN8|+VtI&1okHi*pPf;d?XrX`kfA}9uUgTdmrIfLTnntbHyruyP5RX;
z;Lr^Y7+UV4Q7hBg7jv(H#PBXSFl8@!E!qOHmlBzdj9S>`P)5E7rI5AS$GPj5R>Mr;
zbMW@2Fm}4gF~2z4JUN?Q+NyL9oi03rqw_1Ntfd$8_RJ3wo5vwe3n!w-Dpy?R&xN%;
z6%Z%kMr;nr;rFPcv}npVU`F>sRme?z+`EXZYdVfXw<I{P7L>uw)W2kdSuoC+RfshT
z{TzA8cUYtxg&ti~V3VFFoc(+s=J(q`nX2tr4<Z9gZoWeIoI@Zyt_rF%49Sg0hcWlX
zZ0aa^7ldz{u#?xi!7gtFw9L*U5kHTEAiYLTnz=&NR6$7Ixe_iIu|%;{f}a0UP6Vdk
zU{2n=40c;)u%efaF+Ck~@!;0AxJc?cT{?G^&hFLa;q7?XT51gC1~C}sxeSg@x<%4w
z&*YtLZo?gqR+4Yd>9p<SBd)2&AbtHkmH2JZ0}Zz<oU^TmS(g_NUyK4VA@2_K3Vj1z
zQ$Jk&#hZ*9`b~XYPGIc5X)sh(&YiQhog*#WLc3J-dEd8g0e6#`RIhL*uXc|MH#|?2
ztRFf^)kcc&<aZPB@{@u)O@i!`RX4dy%bLgqpElgJ@eQ53zLZ$4kY}?T6S3#?0Z4oo
z1z)F*fPzpa`noCLvo8%)Y?T3cObLh6I+gI|Pz9_gi-YK8y|g>?17%<A8tcIw!psGa
z=&)7=W0qAxPAwJ#E6y9LxA`M}`ly9do~gmjv3Cs#@g-pUmWQ9a<`MOYt6@i<A6d54
zf!q&V&bq%-WQDj&;Ca>#9>mGB@oUuCABm?R#W(@<&TK+!)gauu`w8t@SdO13-X)2(
zCZIi10keZ<@-nJxsZg6eTbFm9s+3QLWHbdgn<7Zdl|-kzqx9)|q(_UV;UklmIP8}J
zK{q|PBKD~mV(^6V^Sn!Hqh_%$-m&!LXdkzH+!eZbw?Eilo5PUk1QdLC0H&OBhTcgV
zA-m-~R6lNlQ(p<n>xhA%(p{SS0;yp7UU(e064-4ou%w#Me&1Zo6)HlJE1Ga1Dv7E~
z=Hk%$SEPQgH59MCgn=?0aHjAep7~mU0=bj%Yoaqb7h!<w&WM0y_#Yy1zYX%=C%_dK
zYtl5~4)>EYi)LOW8180DzwlLYW<x8n*NR8MQ9zY%Iw&%>9#(BzNj}VR!hr5JBJf*+
z&R%CvvpnlDf2TZViS!_!M#0*@CRE$inHuY6(k+YDQg@@}_*`8Z%R2~kic6Ef&t;k{
zE1T)-dP(+!dn_GUaGZ219>BjRd4yNuikA-GMGLwVgB}<Vhbyh5@5=^I+p-t$iYGzq
zt#!=T^d{&!*Fx0h389(VRG1wr%{w4;mHav-jcY7N=**3F^zEe)()BBuxv<y{My{R&
z5x)euy+e+rn(f@{=Ptqb2V2P=4L9IqpFyG6ulQ2wK~t(Y2XnHe@zl0797&l5TEpGs
z`|4wG==LS1(tHy%{)mU2BA-ds&NAved>hXfr+{c*DC}~*1Ntvd6a62@p?c|bdTx}3
zQ`^>{6=xOsYoi4M?`9IkX=h-*@mnAX3%N@59i-uVB3W)1PNO4AVSdMQu#t=<L$PAm
zBzTRK{aA@6<AY%3$?J4l<@&LhJ{<y^9}>H!5i+3f3jG~gu&SpREbi*kxqD^UZHhPO
z`&1R^`yK~8H)nQI&Tc?e30B4|2-;lzXw+ggw!W_vt$t6zM>g}x+{fuK<?ak9QyE4p
zGacwKh~uuAp$?3O5o>NYnXUU}OlLM9#;KiLUXws7o<SWpMMR2Kotnh<+0JKkvKO;q
zXGPhS0gd1-rvvI+7I5`1G(!G%N!IxESidqxn%&U2k<>=SP=nlXbRSB^b-NYm)aD~>
z*!D!w520L@Z*}a>@ayDqhzf7J?M8gINDN-D*hFgl50S?%jbz%MSlmCg7>_t7!ml+m
zne|2MSo@8B*xFfwcdyKYWY|k?z0YHps7#{Ct-~mNegLg`owVQP0ZcFXhWq1lU_(X<
z^zQ!5c&)z!dD@qW%d%+Vq!$O;E+h1W<7txREyLEkO~lbNFRA#A=a}0}u_rJOr#tq+
z(Y2e<bY%)`T`>&{td@cGsY5Vubualba1gBO`tV!zewyzf0tyy0!Bp)W?Y>`vccRb2
zh8;7=){7-LGboP+bSBdGibA~b-g&TQOE#INR|Q&I%W-jQDHJKx5e_ee9XxR#?mYR8
zIeXIS)pfe4qgV<*Ir6N;7hN<QT}1<y-hmj4bZm;sL0^v^>UZEeeEEKjDBJKMcKo%b
zzU3M?5*J5HT3U$AkB=}b>Jj(yP&BuA#%sF!;W60kVF2X7BYf78(^M;(3qG%lNo&S_
zP!CbTUZ1__u<I;iym%SDQMw4(bKl@VVks+ST?`kOyRci9PeP3bdAu<D6elGrkh}Yf
z6)u`wMsAK<fZM+(!6LB>NRCvJ$W|Npdf)>62>bAkxgP5EC*j-ekLWWaYr3Yp4sGv+
z!ZrgwZBj_X_KG4%R{so<Kfb~&<3xPAQW2hIC!px_0x%PQL1L$=g7DQM{0SQD-{qMo
z7;waF$=*DYx38BL&U;D3Il}CM88*PxuBM6s)5+%MR~+4q3HT&bkX6jI#Xq->LQz&W
z4bck4I}c~DbWE4n{XLE*)l5YP*PYPVCC`S$mEqwQL3YKCQ$#Ye6xD+=$<*1`L1l3o
zm@~SV6FmhCO(sK;&?o9>rOej(%wYd2^wMg9Y3%Pkf9ba>eR_9s1saNcA?gOYtV8E?
zDBIJE(Z_9ZLa-~OODv%=ZQJqdNh`*D{t-O?PLJ)Ep3Pd0y*~*&^p=q{+KH29nxSX2
zE9_NnphMceO^Xx{;%VhH*57tytlyuH_J<Scu(uJ-FcrbKddVbGRTi0zvnZP)!F|2l
zi`IV6rNi$fvAMR7L-MSt_?KE@ynie8vQH&{*lwnBNfAvr+shm>kfE~Q#OSpL9#rPl
zBJP)%qvZT>33t-IVXl>zA(_<|#B~=AB?h8eq|M|sQPva3Ge&bs)|3uLrk+r9$wGP{
zU>zBJcbvR^ErmBH$&j|KS!D2gJ@?+~RA%5<J_*{a$$e6_lOCp;boBOF&PxBCO@8GK
zjC8RknewTKE4r?OW{E7}+$}92;V*L;$4_R=zM&){(;|f>*H6+9^@UXM#~}i?@>r*4
z%f00$i-GrK>E%_&!AP~25g#waTXIMg^Op^9S4?gvJsrx-ev?Azu8k+Qh3Ys>JOsJ>
zm(s1<y_lV?&1Pa*m+`4a3f|F7f!PaMXu+d?yxh1OpAFe#vE3l(tUODyzL%5za$kvQ
z%yIJiO&Sc3mqCT-R7|@U$d2oZ1NBe^_MTM-qjG*D)H$8v8e2}qhCiC@8SyJ*wP!cF
zI);#aav99k4-BZ!z1g^YU^%a#=Ouh<eoj|x(!|5Ajg-f^O9Lw}&@V<?A?<`X-qdzw
zVO<-cF0*;JAQ4Zd37~pNGMMLRpjvV{hz|{rXEK|~!2kwM59s4fl|s07k&7K(X7rDv
z6g}3rm7acZktDA=0%=#4;|C)NUg2zU-paHzc=Y5jIBz#+JooRRyB`PAj7d9T@9;E;
zf2hme9|$0_Yf7nyLIrYeE`>e%LcGt<Ltt|4VsdNaMIzg6MxA=1pg3$cXh)>c8fPK)
zvc+D~*yDklUR%L?!R`30CkgIV+LO6DT4-Uh8#bP2!BFTtwtir#Yexz&+6u6_(*W-s
zdPqn0Uc**)Gq&0V(Nh~%kY~CM?Bz9lIJ(po7XCWK-PAD&WuF|xkw_QFT6Uiv?{Oi<
zQ<AuDiLUIE<RoD1@`+lcCCUtkgPy=#Tza7iJk{2caF?khf07ZMRUHOtcXrZY&M8<M
zXGy+|6XG70u_3N+WUxnFiG5&9uvbKh_glA-&Ua4&m6CIyFBVKEo%l|Ne{z`ESW#B)
zl@%yVx&?dpbr3;4IU=c%h10zcL-l(R*3VJ~WyYIB&Fe-?Gg;o$*_uu<JQdEr-wW#v
z<(bB%wKVHv2Y&vx2u0t-W9o`pD4ga45-ZEmb8P~!YcD~4^AIyM@Rggs;|u+q@rk*s
z)l2Cr6FPh11M;%c5G>mEV(1!M)TUa@s$bI}@Pjppaa{pdo4epxSr^IrC4}3Q7qX|<
z*U`fxT`(_FkM%zz3?@3G*y@_fbb8*WtA6L;s^)13&9^ziUBdK+S35kN8V_Dw+i-bD
zJ_O&02fpQA;*`|a#1vLwd;A1mo5nKu>0C=nQ(n_+=C@EsrIB=aE7MzD=5SeO5<9p&
zh80XIhhNs(<XOT}h<;4rrg0akPng1!j1XauMDD|2aV^&Dg(_|G&4+MRQ(SXzKh#gH
zf%{|QWU?ZKd$Bnm#?Pvt6@UAP%CQ%)A*lvdK^ja}970}SJt?1;4;_hXsQC$dJQZ6B
zwWb1iG5P`~$~M5y<{OY|RgW&p$C~nOZxMmRlOguI7gQbj2!qpgv8>`T$ya`buUBq_
zKc>f+&Q<HV&(0eVwSgL1rzwfbB6(PJ){VITb!UBz&jOp2$X$^WhvPHeVN7o_aXDT@
zGjmUqN9)_j7s)?O>&4F^?pg~or1NRxVt=gLHIMzI;sPVD($MeTFcFTuN^jGxtXJ&`
zlzuA0V%-Annb$9I`-gk5WP312<aHLzk$a3kx_82GtPELO_=~vQ(8l0vCm~qhoO@Yr
zImV7JBcT`WqU&A*;$GK9zD>RYdEO_8VXgxTU(0|V$9FWOyI`!Rdyftdb<$aeRfM-T
z2fiOlL(4f=K!4Xbn2W#g;L~@+a>E|tTQC7P_Ae(Vb9@=K(Hn5m>o&JylK}haRUdJP
zP-Uwu>v6@cbKDK4s@x?Cnsm_B6NEfd(7kmdWJZr;ODy)o?#k_?Qo9DfKXt*Ao2RmA
z1#`&jhrT%I7>OpH!?1gj44iR|rM3~W<jLC%?xNl|;GT32mr8X|fALEAbS)1=|K6aI
zODxzqjb9q`8u!tFJ{|a7)=$!lZQ<>fJp36lk3_s&#D?ywV3w~pgK8##cTL+KlLS5C
zgY{cjGQXcj^xfy$YVM>@_L(tu<6Y1xs)p2?U&TZ>dG_7UcC!AUEV(qR06nkHN9BlA
zi07za+-G}u^=%4F->3t7PLK63&LqNi+iW^i`UB1|m%!(4F^M0EBc8>B%w?4+IJ$N=
ze7hU~k>l0478iDtYgcWsTT~lU9a+wmryJ-8>pRq|TAlY|vlVLJc}cCMl_Bkn06H36
zgsvza(FaYs`u25Ld-4aftLp^VD!(J+4J6sB;X^bcEr#0MNr&YxaZKY=kB;4~r2JtV
zJo5WP*PRjv!>CwXWF?4N7KTk3lHXzLMrjZ=`Ng$&J4hquw84{+a_-zcf$(=g9s2j@
z;`Q8{s9dVcrDIwUj^&r8JFce~InfPt%Yo1)jmAS%^vxw=>u`*CZ%O7Ru02Qt&QC!5
z*ekR!^fvk8DvtWyQdr)jLZ>Z$%yr^SM2%`y95nA>E*NIgK*8f=SFQx3X(f%X^T*-b
z=sjfOJ9+9i;zg5F1h^LbJ|^!C<*w+l;!J&9NGtaIq$iry$+)mj)Z+AXT6yF7Lea9D
zwCR%>F`amZW;rEraIB|RT_D5VAfbi{Q(iZvyGoO%7Txr1)KNyjU%;#@){}T2N}{G=
z&zZE;!*o?t98Dkn&gqycj<?Dp$N`T%M5@1!tKS(*(m$`H`wg_170CllOB6oQ=9Xs|
zJ6E2SEo+6vp>Y@$T#aFe%W$Sz4NcNhWvkB&5{>g|WS)!?481EM&VO3)Qhplh%vpp#
z1B@_3ZA@?QNP^u_Wl4X8c@Zt8CAd(?2JepliLDNnRJT-|-TcxWGo`iQ!Wku&xG8e2
z_8f!VPk(V2UGvAxWkdK%s)wG)^aJB|HB?aYfzXUf?v-R0;0c|;E+HxOzxfDN-miv%
z@Homw{(<QH7StE9!TLkGFj$nu$Vt~VwN_4Gb4`CU@zbs{I+2&rfp?g@Wvqtkt^PpA
zAK<aa(}ZdMA_KII8%B0({FsKW9xvH7akp)eKu_8SH)b`Hb?@VFxc?Q@2@g{FMisi|
z=f#D^BM<SI+#yJr`<`rCGXp+M7iWi>=Yn-^9Zgruf}Eidx+qMJT^g|w*2cUiN+pSq
z71vH(D@=JYZxwM{(N{=P7T`TmQ^SfIf4PO_FW_MIGmNWeiPHjYC<;CZ3N_+vnebfJ
z`R)?BzseZC3_4SBZAsSW#W{F*T^jfv;W$gOox}t?!^L<T2z0C=9@l(eXXPw%sz`@4
z+TWl)#twMpbqfvuIHn=v4$}3tt8x4FEBKn5g@5KgCd%`R$&j5B@s{#p+@r>EH=VtX
zAw-H<r~MHZU6BTP7e}i3te@~7RdGN6EToSwh@kV-zeLZqfdmI6;m3E2$?w0T@K<yj
zQT*IWsOw}{8KOzIuUG&B=aWERa2%Y8I#0B+HsPZ&t%~1CF5dc3Ky|BDz{6P=@#;!G
z)c9%QT?b|6ihVwgt1!Z&nvD=Eunf!Gcw{oSgmlKPg<T<Ib-@oi?zRnS@ch#Sm~PaL
zx9ww~Pq!O|Zok2COa-p_wj6a=OE$SpUdUZn@~+7~^BNQWbsdUN-9V0QY{yR<D<Mkd
zH1`E|((J{~;Lu`>Ax<l>#riH$N#>H5)t+o~e;>j^2ecorh;p1%9QsfQx3<X8qXHd7
zd`%@P2I!Ly{R!l?O&T3YpxjsIv{_<XM)UG&si<KE-Rskfb<6hS_(WSW+&Y0Z@Q|b8
z3rtXGsye(0wt<+5xoCCBpI-G)A=A$*vs06g<I$F7m=*LC%U<cS(enj(?xlnsA0!Bv
z_KCCQ#Vb5WHlnKkH1=GH5bGpifhF3{iOR@q;x#D{qPBX#qXtplL6?<q@K6m_2U?R*
zzXP~T^aTkgaxhLw8xA&<P=~Sqc{KmNL?W`Kc$cm;;%}yg_GpMeo!>FMF8`1!y>q8m
zvfSBsX=ZrRU^CqzQH;II(z!G426GqLT;>E_t!2iQ40FAz7823>&l?R<8MC*1K&zxF
zV_E_^Hr0`f=f6Lq{dbO0>pc~?SN9~*{G5nZ*N<^a4aVUDi8OjJeKVd}yd3+Ed?F7{
z>%!iXGq7x^n<lJSj}P4&$@LKqMy;64h9tCNv)*|eOl+W`T9UZxohFh+=eW!EDIi2N
z&@-nFqh!H*RDZk}9~~CPu*n=;{pL0i%KXAj6i=bSD+91x{4u>f)datIA7VUaJjcjM
zimY_YXKH`?1?NMdAat!(#aC@M7_fK(-e~>HJO~fQ@#Ak3{hHG>`1fWy&b@(P_B1lE
z{SVE&T1v5bHNAH!k}FUd)bxGdUNSNg&MDU@qwE74dS1Vu-qs$AY0E^&RP#<Ic<M0K
zN$R2H#S6)#cVo|myDw<btQLAX>kPTLY(5q4OQwIl-q5ZUPGo{>1tYXokGrk$h1nKU
zU+#k^M~L~JPL9AKPiD&U<IM99m+8G~E2>xdg}mSXp6Ym9rUkZtsibpPQ{j6l6h3o`
zo|HdHR8ppq^S--?m(h5}>a;BSK^UFtAvsnD6j39?+g#TRbLh$2NIn)c()K`cv>3QV
z3Yy;0UpggZ#eo_Qb5Q`}e)N)6GkxjBb4Td%6LZKc$zWKwPMOS{v5Jrd)A3MMEq8K<
z80t0}!!{cWxEL8vuiBZTuZsk3o+`$=DQHlsvFBd-CvDWx=A*+KLwde02UnQnnl*Fx
zqu|b2L^C!Kx>5#6@`-r%n1K-P(p*SBTu{Sni;s{OI?CvOyPp;<GNHL~xj0ef8dNGj
zqcMM%691(aky~vJ$A=D)j9WK2(IF$G@0AMZX=D>AzngSqmjbe>kMPR7!}R>KY?u&J
zOC<S$uzsMq$@8NSmKnQI4bnm~GjE|s$avPcaXBcjUQX*9_36sX-f+)y3*7TMjBP#P
zprug8?VUT=bjLdsi?@vjvhWIKo4z4`=SO38{aw;j<WBX&17Z7-(_EpHFs$8ql)Nkd
zi|fkg@ZzOkgT$ZByzdV`FfZKHSyA77a+S^jZ?%16I^F^lykmi%ZQJqpa5`()w2*9$
z+d$K#1lfa{+b~@ACVmX~OD`N!VC5w?LJU*M71DHqnT}Gt@udND^JjO`_$Cxh&$@t0
zw+?iRuctYWC&QDkb195yW8R$=tf%Zg%uzfHvqntkcdD5|LAwaZaT_?sZg%j@EsZ)I
zK7r#(Ca@Ng3FQ2S!?0AS1#A}O5SOGQSQ{gWQ)6>+!DoG*(!taC@P#8b2xWk%Ltj(b
z<>jpIMPvMV?*usA=*P|(IS?{oB2P^41Np)o(=8^q<ILxo@ZxJFvNf0B(}EYwo8z<T
z+vy76cf%TIo|wk#aI_((qFZs5_DnMWi7}_>IESoK6kzA2-NTxORq)5ja;(0617a%Q
zu|QRr)f(#`{K>Y!SItR~yW%Oy(q4-e50jfF|JX-&EuI0vfq%Jl@E**0=mM!dkLdp4
zpJc!{g`34x5n0n}=CtH`xE{yFkOm_-xUd@r)EK-Jse<XNxooDwW76Gr6+6~vkuC)j
zvhe&^Y*|+cmy1(T_XgreyVodqWIV>TT_HlhEkX9KGTV?~0~h2cz<wJ_yv{Y#CH!<u
z`{Rl>J6%Z6^6?xt(i6((3Bo#ebwJToU>1IhybC;mn_+-Cm7ND8yrs-||9Nm$r3QxU
z53@^GiDG;49&B~d#uH!a=(n~s?&mL(bWWBqv1_S84(<loi96`_AE(gj$`V#YNDQ;4
zy&ltD_mFQoWAChLW$CB==iq*MCjO9Lj>%1`pf5_1yp!UU_zp2fma2?OxFjR|Jd30k
zm_S6o6fB&Wj1L4o*<|S`uG4dUZn5?-dA-$|wmCf_PbF-z;mI!Iulo=dzTL;5h#@g{
zx<C&$niJ7PQFK$ELX88wF+DMi+PAA>3V$AqNz=H>0T$TdKZ54wD!ez5n<!m&oqkwq
zM#cV0vzD&a<m?s&)E2)1{A0T@dDsX&WTjz6x)NEKdxm~}*+jYZr|2rFU2yM6Fe`4F
z2Wj30pecHXW8s^Fk*7RS=Y}a6x=_kQ_3y#+!KF+BBS^<B{(wEk`>`HE$&s}oRQlF<
zybx@O^=QUD(Naj2uiPWu!7RR6slpb#6lOd39i~xK10@3&(Fier)X<#5oql!-4a|Q_
zKXm8eqUkeO3x{Rwx95`~aEk$Xcx8|~@%mX3Zz@P0u-E92X(7H=Fa&+sCX&*dP5P{N
zfL6$Nd=;99hEk)13@*f~EBV0o_tFdD9b{@oEr~6Yf-}!F$<*5Qs2ekpSMPnC6x%+4
z@y?N?xi=YW-c3PmyP4qjJC%A5_0iFSb4-qh7<>HmN#^zOEc~6FO4N*e;feACYP<A3
zCcocB<06!Jqfe7T+;Sd17@Y|_jw~kTE1N)jw+YYa!6P(!kw}yagW+&`Abpta0w#~=
zgWOnN&Y81}b81dCJvpZVKY}$cbJ`%W8|%@Fk!didN{5~-ckTm&Q(!oAJvbD`F^lTV
z|F2TcJf5oU{o_JJZl|Kb(5aLnL`A|`&yM8YNMtC<P-#*ct~5!mp(MJ7LXt=-6+$HC
ztY@V`#)?XllCBbI6b&lh?SAj?e&vtv?{$B_eO~Le_t|^B_jC5%&t7Nk{XFZ#4!X5K
zvCJMg@w*a~5q~ti5r+PXLT=xd9r!s@76;U)!wfY=I9N4RG;;8AFkT-3)%$m#d)Y(i
z@NfeEs;gwX+yNM-pARb^4y9&hfuy8r7$knmBtt*XBB%NI$tL+MEVR*}r_^i7rki2t
zvSBEA+a1G}%b%#Gge3&(PJ<KQPSc}1wYW&@+fZP_=gFB@(UnhwV3fBG#0ge`M1eia
zyP1Yw(kmcL!VFyYs(?==C6lZbp~K7>I~6*KLRLNaFX*5x3w0smqB^T;Pa*v$4}!`#
zNt`cq!bj#o_&LA;HigEcAM?hUZ3pm#^;I}z&%lPy8CqR7o)iiy_;^4NoNbW-&!QMQ
z(sV4V{6mFFy*@=mrn*Gaus^$yp+V+JJHcJMesJ<-9C|%3!<NJZ@}N-#?>reytdvt>
zo%uDAGHn$zTe^ik2pNNc_udk}0k`18VKtgF=Np-(yb)&f3x+j`fpo!HZLwLRBAY)u
zmHC}kVqFXGpw7BR_$e$54s1=O68mn!A%!t0%oMV_8-4MiwHfLzO9mIXN*}%yh_0pZ
z@q$50WWE96WL9^9g@_;Pp?eh9bH*Thn$I`5UkY(1M$|L)nCZZ=l5FAYK<MWs1ow`8
zbZ&=~`1i*3L|m3i%r?oxteRDLe0eAM6pqGYit;cyXfxfmJhYI==3rjc1Xd884psdZ
zBe(o9zOv<?7n_RNf)in)ao_lON=O6D{S=7d5$WjYb&I}RdIXk+SE7t-A{Hy>!l)W5
z$_v_ua#vnc)dzQ|-EZs3iB=sp?D`OPZb%La+rrtkRYH8drAs^|@(U)unn43vC79a&
z%aE+;z{lX!5kyPD<HIyKtoaVDaxJKfawwiwlwv~-RheIf2zMWl5JkTEfZ`42Sl?Di
zp3F=p_WYR4XZb1gT3!Tua^8ep%`O)&f01PB6B8*K61<<JBuoa$v07}!t?As3kR|XS
zK&@!cVo9V&2T;v-HP}4zIeu(@OT$8@VS7o5NOh4kmti*oT1WzY5SxY_dt=BLfj%9e
zFo4hP7)A_Y2hufsT<?B{1Ku-V0(n0jB9TXA(W#;@Q!A5ZvNg+a$jxTVQ4w$zF1a-N
z!aAD&+YHWr{tQqU9fb8niAgpeVW!rtV3Rrn9D}!z+4=n1f>`nHUkQ4LCWBRHD9&pt
z6%Fq{N?dU26kIBQMc(J<qyJYuY)T6?)k?U5mo{j#yK^hx$>KtIn);MPTGW#EnbNTI
zxB>fVuRNqai4uK}w!sB4uJAHlj~>4=8pb&fCzYFKf$_u~c;b2;!p)-K{aba^6v&~z
zj4Fvd^c^1$S`F!MVj-(zCs`nA%uO^;f~N&GAlR^nNh$}3E?<%$6Q;bN>dJNyvQ7&g
z!yeSrw-BElYA1fTU6s+-2QfXO4-5X@7cQwU5$mP9;^fj!c&PKCXhg><h_@XKgO`X!
zGJ`!ZS9K<8jr&CJ7HXmOGD<QW>PXvj4(7+5#{Bc6=(R6KAO-?%&)yel_-4|lsghv+
zO^!64J%vktK4TiSf$ztw-sHy#W<zeHHB28b&5evQC9|yU$h;8})NalaV42daWz%hJ
zRg>fTJ>4UEa6cTvzK0UDb)(H*gTUoN2vOe}g(b?jL>@Upx^>E25|?6y1$zxJl<2Tc
zdDFnOUJvSw%fQ__9;ZGY#crQ`fu9q%pxpH<c+1%yxR=UYor@f`@YQ7AquL>AvIO2S
z%LXCehDEf=;HyQ8alelolM0F``V<g|&->Z3%MbUE4JV@U!Z8hQt9lfztH{Tj<22a(
zN>%RlIZdV^d5+F8^gumjSCkIO$J5zrbj%I~C^KrMNBtbwSpM02;Qa&$+Uf}FuUyBO
z_6ymlB3;apkH#&Q%i))C@pzvf=Ov$|Do$Ic0F{cT#9mUWa7g(*z3bITRjXx~z+98a
z+zN$#uN!c)%p2&iTnWEr$-tpc6QJ)TS8;Hq6$!|d!&m+{Ad^c6eQqeSpG|Rd&0+j*
z??|3%YjDA(demB*plawLI1}#)7ngs8ti@&EGXEB-x#|s#q9*YCl1o;OD+jIEAy9T<
zJlnm=8qRAjfD-M!q-eedEKUys$zO8Nb8jDbVRl?R{0e}Q8Ktv@2@vgH4*lEuaA^?>
zxSb!>Y4zvJQ0<%!DnTBsJ*bjwU;B*g`Va(-GSRR(K9D@~=%NK}W4Xeu*Qu`CO1zB5
zc&b{3+)nNbPl_XmiqA>Zx_**gYjDDJ!S7fY7K(d@El2a=F-+dv7ZW`C(!>pSiKl%u
z30f3_1HaailG+s7CXB<iK`-%Uehzl6>!Nc<*W#^FZ}IEF3+HX8<>8&0flRG_5auRc
zrg}#O(Dgc-9CGYJsauZtj_-qa(Fi<I5{~nJvcR~_w^5;=JWkAz;`=ue*bpFxN)A)R
zBaZFF<9DsW@!@uov?<n9`E)b&)XIXrd^>e$J_r5N-JnEhOB_xq;+JAEDw$am;Uy7R
z-JL;?m}%o}J3IPDDhW^5MiIY?dl)>sktBT2Bm=%ifbYAd<Z`tWN{@b7bfx_qJoodb
zUPjk&cWEM&8VO;O^lfO~c^$Q-K0#iq0y#A{oGLlrpdHh8f%hsMsI#ra4Vl&CPK&c>
zhT{T~H**^_PR*mK7M+l@@f0*}_k`y9U7&4afdey-5c5YNu*209{eu(0h93i0&F5Z?
zo%e+(R3w^eJl0}Ir)EH_&kiz3O&6~7eGj2eve>0!DN$~+5^dh*h$CD0cgb>nz<u^2
zTrHVNZs>g^86nMZU&fud&gA1#jfbIW(qVL)6-jEg39&vWnV8gNv0d4Ibj|S;Xf(;j
zyu?T_48DQ;w725=gP)<_uG>&|51?smIcf7KH2pr_jc9$SrumEbY$X3bAe6txONy=$
zEm0x12}Y0|?gDl<S4%YigqS=o_rpGq9YCpK8>mOmz_;Q)+#UB|RyQsI`WsB(uFERY
z+}BFTRFmNN#=#`0)DqGpqv-m+vQV5VB)=w{r7`sjQOC;=(sv}_C^$`P4$q;ZdHtGI
z8wY}8qL}Pnodiddk3r|QY<%+|f}UIa9(^UmC<xax%|4?_)Lf21iJ~`5D?EV3{nvr~
zco0weBZBn1!;vCU5z(10!qmV?EMw1Q^mB;8_)kBH+!iXcTT2^ASYK~^^fZEIU#i3*
zb*9*96NjdJ%z5<1eNa2z66e))inh1gum_G?vGn;tk=uv9AQRLiKJK1J-f1X^H?#$?
z7~dF7c@~9=Mn<Bk-hIe)2*p!*6;!9P0JG;E!KlQ^<XGO%Vtc<lS`u$e&y0OU6RlQb
zozn!<G@}lB^_VHu@R`8EM@iw&H=bc};3|AuzY~Y+=~8Q11?H?)jTHe!Wbv#9ywryD
zrmY5RK9`0DYwAd-Y8AGW`HP-DkfEThNo|+q<AkkuM0(NIbh@-0#Jhau--%`sueHY1
zh@17N{=(3*mSTzNynX;*{(*kLly3ci+{KL$_-OtGJI_UEQ?5uRhTkFXwL6IKa1A`f
z>jHG;4TNDZk=_aE5{INoqdB#p?;4HhFI7qOVsRK0tq4H1pr<t8QV4NbRSRXZRb=};
ze{eU}hL30GfxGWBI%rNI4x7|2GPx}WlH42ME@h*6^CWoaA4M{|-cyr^`=qnjf$FqQ
z!$gmA)8@}AaQuiaK3|mw(UJM066<R?%!`AD$I`U0ZUBZG@%{4Muc^?aosi&J^qY@>
zS!5`}H<xATl)i_)zj={VHyS|aVhPwE0I*xXnS72p1Y=j)lATR*_~O+;D0;x)by1>8
zxBkNL5{&&n=r0&_pTFr(ZnMBgOWC)LHCV+-(|vWB@W%WgYZxcTJ^CuabY^%%^^5cH
zu0g;Wwg9<Ynal<qk*90FzX8n}1I{mL99OGn%34qTf%+5UFl2uwn|kdaXYx{k%U&+Q
zp4+B@(NYJnX|G3*F(X*fE<ZLlW*3mbuH5ngr#TltZ~D^r9(vD?MA->hIDMunx7qx6
z6i$|-*Cf*6Mpvn*QE4p~^LQesQt$$zuKVDp`-AcO<Sj&V?qu9GAc#eTYO@1Xo;aXZ
zf$PYOAl@aHP_EGuE7$DiTp|bya^DDBvZ}b1yJFET-Hpvm5pXJDhk=;9K>g}Au3@h+
z7u=}Jy<G8L+-_{bR!H<?>{|vpx2m&QgWb7hmtt_t(+rVeK_`0g`ZKN%4C!$VeQwQu
zSq$zlV3oC$dOUs(C2OC<-Vafvvd;tfvcjBZX^vwn)WT6cb|{OfSj8PX<|eN49S8R|
zJmu`{n)rdEYiQ{cnWB1`SGaVKBdkf&<$?oq*ah2oG`4i+0-g)WHt8EwAiJL(9@2)3
zca7zqcx_=HL;8~0=L?9fHsA`qG|u)}E)&?8!!|ED;$G~`^?5OiyHTjb*$f(t>(;ok
z5$7hsOLJ>BGF*bq+W9kHKNCxRWEk`*55PM25uBR7Eo@$#Cc0m{nVj%ni5@D;aNZPS
z$Q)%!r=A=Mx%@pmyP_3T(*}}PCbFE;$Rx_`Qot+RMe&EpcA&e2kAaU0g|p>+&XR5j
z%y&+OGZ7keb#5HAWouzry$7sceMGDyOraATV?n2tLDDbIAnnr%?Jp}t&R$<gX!>lj
zE`-qr!Bnco`a?f8V|ceM83tPAQrojBP{O|lG~Bq7xLrLel5^07hLI6;PE82gH(d+u
z;{qX3UC1os_KTfc>_BO@DzrTO1J^i~fW2@54DjqHO3F!tZO6xuy`yJ9(i=q>(Rc|Q
zBN^4o0Iom~2yp}6K|H-rW8Wm9(~E1g(Y%Vh+UNlBK|8?d<Tji;HxHa>wJ1Yk0gg9K
zqL%NM&~&LJa%Fcci5;oSow)HHY$f+WUeOCMZ`wx<FZ%Lvk|!q1Uo?oU;tqr0u>g|8
z)`O#Y5X_rhPFMKM0l%8nG_F*co;z9qzb5oYj~y2DP(>Uu9imFSb1sNG(mTn$jYUN@
zX^fa<8G_VOC)%iT08Us*anCIr$nYj@=IeSJ=gusF#&QLg8NQF6Q}KlJN5V;n`dz$z
z!bh|rVF+g-$ivrPf}!PCMJoO^jsTU1j_Du4_v-=dZa-|(+V4y7Vtd`?AGE)Q-R+0;
z_Y|Tpe}#;+UycRF5#sLs4K9`O7|8FgyZpEP8T_cHGu6^u{~rtZc^1#$x@~T^3TyxC
z5dN#p=|9>uvNRX{W1Bsd1b>T7X5IETY!bTu@Ad@}+~4;9(vq|^H~s(ofGP6b)~l0O
zI7#wnySvy~_vxPftqI#xPO<xn{5bVii0w`H)JEzScKpv&&mV42T2H?>+fx_lN7jy)
zf&77;)SK<uC-{+#=D+^HPVUY2{6GAWy*Z~BJEb?<^IrIo9cb5!HR#Ru^!p!KJI7wE
zVQ;pl|Nh7x?#)i^&Gz)YA6X@*UUrRovpqdW@SlszFU9s?|MFEkYZ)1xzbXl(bpNg{
v|JTz0%oqOYfD#ge=ly#F&vk46O04-)PW4ZnDBex`%5<-j=Re)`|6TWA*WUbT

literal 0
HcmV?d00001

diff --git a/RLLG/envs/ball_in_cup/models/near_expert_sac_780 b/RLLG/envs/ball_in_cup/models/near_expert_sac_780
new file mode 100644
index 0000000000000000000000000000000000000000..0e0dddfbb28c8553ba520ccec6be99b873eb8fcb
GIT binary patch
literal 22735
zcmb5V30RHa_dc8^%_XTwrHCe_K}Bb;Cq$GIr80)3qQOyS85+<$ASo)TP?921XRn<G
zp)wUh5+Nd#DD!*1-_QHI{{Qd)eSh!ode3z|*SXHK*SXJq_I~zy*1FevT%E+kMMR{e
zME)NyB@tzj)th|$1Ge~@u3a6r+GOK~z&SG&M3(-?Yp<w}Pe@>(uTNM&NN}jkf=z3E
zH~Fr$5AX?-;fok7;t$jJ(iB3kSsm)@9qb#nHDuEUlmB|3VzSjYz|TL7FX|H(BK##2
z@6BNWfuS<qn|#-74hUT99p)Px8nVfIi@6M6Okdw?xDY5*Xt~<YmoNU$S7hb{hO7>A
z`{$$#U&3pc&}Ir>(o0PE?z3?-f4H-VXc%8=p_7P{os-mlzO<8+)3E(~nHc|nv6Bts
z%Pr!|dujal6xRf-4&^KO3xD5V-2WeZGrpq#a3{MkzS6&8qxmD8hDGz0|A8DC##dRy
zANAk=YxDp5*Q);kYR*^l7m`&9<E#He)^0z4w2-rq?3jO$HNyB~7x6X!d$Q&x|0b*T
zAGj8L?SIIQ`@hNR{EMvaKak_Y_!AcK_5ORZmj6X|;(vfz@+bX^?65Gt{=ZPgorGi!
z{vm7l53*4h-*^#!@_$d(@*lD$|A9M|Zz?1!9>$;YZ`f$Q*}uq|{{v|e#<yI=pZage
zzz{$0(6F`ML7M}^0yYK)_--=s5ng{`tAoS%R{sHM#kUqh3YppnAq@=vftnV^pT3B1
z>!l`K_y2+lS+_3KH|&4G@cskFnos_Lf&UGL3**}@;?LL|&Y$TeErjw8@(o-4uf4*b
z_3z8eUf)Z2R)jx0oImH^SML_zO`*bl!gp{Q7V0G@9Qxn)1%GZhf1dxphgANLq5s-4
ze8+IU(?2Gv|BZ>s6#o2hzVrWJWX5+1=P&pljLiA2;rxaFgOLS)Q8?f2e=xG-yNB}^
z|6`;qB>rD>{=b%w?-9=T{Kr7ye>3=JQTSfr{3ZVk$_odDtL*RXw|aAEXu#@V{?c&%
zvVV_l4qap7>NH%+d4xbCN$Ax6b-4bw?oB~t>VLR5(<yUin*ATn?LR!-|N950>U7>;
z$15;nswh_^(Bo3(A0^EZo9U176FG;yKHR)39t^YgGlj-WNN3hFZ9^HddZ3NYbh=7s
zI{<w)yn&N*3ghPHBK|ZOARZ+=e78@Y9Gs@c-Yu$zRTIx~&BNDVdCvoGhVmoc!v|_G
zxL*PO2D`&zu}ARd+EZ9?pbJQS2~NwDWz7ff;qDJRA+FPcyJTHR%C_dfAJqwX|8*nC
z`o@5UxhT6IatRmv%5!F0Ti}LhJ+7Sek#irui0$JAa|6|cSQW>?MfDVXRVj*k_Gw)7
z=0})xF9l94l!f@){X~Xl<1^=XDA_*-)Xh3!N9Zz=TiZtW6rAHW%9j#Z#R0OQ<s}4+
z^8z<nb2c++J!fb=hQ;nKA=yL$J*4J?Y1w2h*8Cx_If}6zv()gt!6$6%=)!9$_Xw8|
zjkyxtI9_ZR^iTSX%4$`dNsoKg#U^`h#j#@Y>&#Rjd{wNf`$put%($_QF<eNl1Gk4?
z%f!wYL)!^M`1bQI-5$A=hI$MzixKy@!@7~Qdx8~PxoiiVHVXm0;fAc>d;{#WP3LBM
z4(CqSw^F{FIy_z8Bsjmw6|E0m#^vYifPAiJ0jdTlV8@tMLIk%-CY&z68;0X`C0Ih<
z5wg@VgXE4`jcv*Yz$(&~)}$J;xtI3Ay0O(*J?a4Yv#k*Ho*v=i)u+K6M+L5;&kJ}f
zDHs*6;MOgB1zBq2s}~r{anllFndi4SZq&Ki<dVY-GT=X*g{~_h`F{ey=Jq_!=$aXh
zKj02;1}@Px6K+tangr~4>4!`DjM>YR9JkCZ9gOh{Zr8LVwZ9Vu#7&cFUp|cAoAlY-
zo#L#sAr3da4q%;6y+Ovyk}WUq!#nq1lhkqr@Vcwb-8p`WJ6gAc6<+#D;`=M0a(pQm
zUAq7km49KyvwqyU$e8<4;0p_1yOF;7E_f2_0x2OU;PLUzOy;GX;6cY-)|}YDjtr^6
zu=~sKG(QRl4Lhhwya=1axpPj(GMW4Fwd{g(9ge!>%aaN%p)&jI;a>h_*i{7hY4Q<}
z+w>7!j6d>XQ}^SYtyAdAsz%tOlYqvP>^bR=-?_yX;!tnQVlw=>fDrMsur|AoPG$<#
zUu@5E^7{&L^tQXC8dJ$wItNvbci_9&W|;N!Ja_bPIBe@{;Y5rNaxJnP9JPxSShilk
zT`N3so##lVykZ$ACG&?lJl5kJsXV#6Y&4!7ZHLod#$i^?Ri<P>(7ZOA>3J37wuhs@
zYqcJ^lbglXY*S)d#ZQUW$j4kj;}%r8n*@%(0%%;t4f5T!2JK_5anZ)pTxeYo2EP<%
zm8&;!(N_|nI&==P1`q6hH6GoqGr5H_A5ddoG$$rnfm$zhIUC*0SiZ9Y2ZzgG`Bxdv
z?9hJfb)3qc*2i)ypPVE=%eKHbPhE2PfI8?5Y+^NiF5Kk1>eaui^KtK^bW-5`L{K49
zgO{xKQ3JhXNa>vnr6-Qy(tH7o6<?2;iP3QLBcJCfn~OJRDRN=`M!3Sslhe_z<R)A>
zPwT!;BPV|E!M-+m+->t1;x?RQZy&mG8HQoZs>2HX8cXPeSM4DAKAgVvc*Lbgt>@C;
zDl@N%7F_d|&t#6QDg@t@hy2No#AlWg7xiuxJ97U31Ygg_o%W01`mkbhPEwxj=r~8-
z1gLOBqxVqdnQi!NxiQ!HBwlb-qLya4)R62%z{iDzNVX1#vYBtWBq>9#@Af^xwvoZO
zP$Z9+No6<>i)^az4V;tcQEuQ+CfD^$72=o}$$dJDNpW&4?(b8qTsn+X=NGY@4VtX(
z!UB{rFymUsO(*ulzv78F9rzs<%lSuzuq<*`fabA8`}_>N=|7G7ez^&q2~u{F1%EMY
zK_wZrz6I1SF30P`ZK(1s3;3Mq0a4TMkl#D5LAA3kES=*8&XN)IDDO3C7C&CKe0+=G
z{y1?6+}?<1&X=>zmJ>Lixn|%#DIOpF>B3cMX|S_Gk<<E7iu&zAY}M=$B!BN%fc6=r
z_>2ZOfwuz>DVlRD974J6vZl~pCdXCnJV1&nRe4ey6)-<90qUn1k$kCbn5?>z?GI66
z@HUG?y~@Q!kMqH9(tXI;HNYMCJ`O&dFyVYUok{Ak<Ir=Bhxr@-QYY8(&^tMe<NkVb
z6R+Gtx@Q2Fjgn@qog>+*$qP`m3+cnO4i@GxoQ;%FWqCi{nD63p-nW7mkaTXLpt4DZ
zg?Z$124APLg(1he+>sHS7iWO0^-jR=GuHI{4?FZbZNMF-+1$Lm<8=P<-=M<Vh*~>)
zZA1;t1P%AvF#dWZ7v{%t!<$OkY3p0ujvjBU7jeg-pjnvl<0{QilC-n?XpaNuYv8Ge
z2)54rN+T`x00-U@r*ugisI=ki8>g{{VOelugD%s!l8hpApOSehzv0Qf+i-H}a_;e6
zZPs-xo5d@{u;V|wsiRmiG<chHyKe5qt1%0)()~8hc=8yg#C+%0%$kHv@Ac>qPaKN(
ziqiW}BRJ3gWoRtLV_o5|xsG2~X-ugGah~Ht3%xgze+3-k{C)34{*^%qkN>ZLL;ODj
z4zoEkMMN^cE@x&{<#4UHkC*!KxuC+&18gkD;(qlgVlN&^d(}<J%q6`vf8J?$G)5Ed
zWrxF+)pb}AFA4GckHP4(CYawY#zqxQf=I0+aBPAo8udITMO&gE;Nn`rB%>D)@VJkh
z2^)oTwabWTSrj@C%CR{*?b!6e7CqjKgS_w=yy%_t(AXxAPI)K7W!t7gxsNS&<`!Uc
z#}km+8wq{g9HdE9<F_>n;m`4hG}G7^woLvjn7yPKN2Nu9Rp3Kj)aE0QUyz8qG{gx1
zm=->J^A&@q9-|@20_Z7BBfDuK8TKGUaDBnKX}4cxk*^yC@UQuMK=NWl{*|~1kN+R@
zXa2AG3obm3L#69j#k7Zl>#<khdtf|Sb>k$F4VI_-*97Clg%ZrJ+7YWB+R+>I6a**T
zhHlTVwmVz)LvGKM>YVUe5`Dgp_Aeb??bg1Mwom>`jJ<PdhF%tY-u)V@Uly?Y6$c=n
z_Y&Jv#JRXNg^WA$nAIPSBir^VGyivixFawU$Cyx_RaF7DJ<(@UC*(jbc@(=fI~#wF
z&;i$1KgiF9{U{h3i>Yaf)z{t>gKW4eG=I`U_1Y1%V;hfcOX@>~k2bJ*R1{VOc+;e;
z!_Z3gLG<PVDr0&C6}wEC<?IVop>-}Or>Qe7jTjc~GMqJaIFecQ(Wssf0fu{3xuMdV
z<jIa1(D;1;+z)Aisr%-jy4(S9IP;tIB}-yt?NL}YUIOl=oThqDAJX1%cjEl@GSH0+
zm{P}6+|{!mcnaQN+gJ_{SD!<Dr6-^_eF)v-pF)$ZJQuLwARgR25_IFv!K2_a@H}cE
zhORYWQ=4m1^S3oUIPxr$X$!!b!;i?7JCuH@EvNAc9;m!yJ{+%l1-%hPH2U;4V80KL
z>pg#L`|swG<C&B3fUhHN=|4~Z+FKL8sUny4YekiO_ZBvZZl_WH3%RpzwzH;Rj8r9x
z5W^jf=s$Ke`*}f|DZGr~b)PZ91$;(|wi|F43?*2m{4!1~GYaQgKc{x%8gTlPZ07eM
zgWL)!q*5Q=fV;mw^u+>i-<o`KBJTsOvC_akk7zV-dyo8S+tI~Y61JLN0xP8{?9muS
zGz}5K<Mj!Gkg^Tfo~FmT_PrqiX{NZjQ41uh#<AubvDmG>7mC~0lY~K6@ZTfB?DZtM
z;BE&j+9-nd8H=FtGzVYr=)j+yztJM-Ad4>EBq%15Tv~QDIX}JuhJN&swD@MaD^Ubu
ze~p9R!!==_OEYuVI!D^e%dl-mES7~6+s0+{AyL)=%S!KJ+edwz=`lpbuAipGRZ_eQ
zHzo1oDNoxQU-NiQ^OcFSNk9F5_6D)pSx5Cg?j~xABZPZ;1b%FfBS+Kf1yb|Z2&zs!
zpo2LjwBhy<s&!izzjet}jX5`&em?U|Q1L^Zm+^TGaaht>C4Oozy<AvJm4|<(^Cq`Z
zNAqBMqb`X)T1$BfG?(7}wSXk<>Y?dp2tC?+mb5&7L8h&{LTAlN5&XXWUhu2ChvqIz
zqs^!5cq(t?aO3pxv^74Tms~rKo{lgj=Y_FE^3z7%tBGkOKk*V}Nym6=Q;t!qL?`-d
znK!)>ph^SRWYWmP;#AMJSa9ErBS$C9r&&&0FidSc393n-w%<pM(=9xODc2Oa*C!KL
zr$aK)J~j^LRE~g`C(}Su@3>%_=M0cbu*DBS>9}*>47T>T1I@DC343SF15Qv*WHg>)
zudg)R-P;B}g9kx*NjT(;H)Y3`=0RM8J7-;V1$MfsL!+iTcUXEBc$<a_$`1z7T?Km3
z?Yx+nUexBEhSox?Z4=Gka)#%+S&D`BO2H7yGY9oAaM{C%J8Tk1PS+G;rO_U|x>J{)
z`fSK<+El>=CqvR>AIa^gIRf@N-K2kYH|Q0GpxiGG+7m-qbNofPE!zz~p{D2@drPor
z&S5Nb&BElh&LGyfh+L>gaBeh5wPC{e*?uDVb>%I*42*<2^Cj%&VhtwSwVL-b@(I$L
zm%!jbE?6#+rkc8k>3N-*yn)%{VEEEhfy9n((AYTw&%Hl_Yp=HA>DS9xeybv9F=G%H
z_Rb)wnK5{%oYGTwWpSDOThi?y^eMgHLH<lWk@L}nA@@KwZ&D15+wp*IS~Q&Ad%BEJ
zISY8QEs|Qcs<V0Repv4tL37s3p%eQafML5Lz3PQznED3tu73r!8yvx%SnUqI>c%Ko
zHjQl@?Fr(Q4LJB<F<rm3A0*~j!0D(#ESdXOp!dEMhWVTz^0$J>b@3fgA+Z4EJ~`0>
z8A+B|I}IWyRnWu;HJaaDMMY8$;)eD~^s31!e6IWe%-qG;U)2se7@UJYwjO39%9o+}
zsYdGc_ZR#;a~0p*l0q+T0?t<21}h%kz<AYjG>MhkR)*#A^4}Fuy(vHGxrrI%i;^fr
z+}lTAU9-j2y1GD$^T`4W1HtQwYIK&G3wn-R3K2W{aopW>`Zmu;U~|hJ=4EUE%x<Kc
zUy0$Z;FW@o%GdZ{j}u!~H5zY*l+jbe>ge<EDY*7-C)v6F4E^Okhu6+&;H%DIL~B(z
zQ(HWqt&>l~sSjmv!q_Tu=V>z-Ufe4<z-^-kYbi8*xCdz=QLOZ?ElV6doY{ySqWcc_
zfwtr)tW$F0f|d@45uZfB_D>Eo^fZTv7bO_-&WiocMffyvB6m<cACz?noV)Ud*Wz0b
zRk4$(xO)-WOq#(pi6>%yDuIq2B8-!1$KQ&EoOMJBmNwo6$MAeCoT$#du9Ad`%k?y6
z!Vh$)`2%P2hhyXp9^>VwLye0F=Vzb@L-PxXpJFM>s;?xgca36?>I~qsiXLq5-ofeI
zpTV*(t%D?b6%Ic+j4PWJ!EBuo>#CzzDAP}G+!{i7)q#?MQ8-yvfw>onFtaVILF<zw
zSM;Zwv>XV54MX)XTRsEcAGkm}lMdilffy_3-%gzCev;Vy0%p4O4lHsk7GxjMV)stn
zqS{eo7_R$DD@qlZXh%Hs862ni2_<ykh&YS5(ndU|lu?KlCHtLcv!;1DAigu2`@pwj
zm;d&W`aNj^r8VNXURs|`TJjRcNI$6BP_h8eOApa8d9ASEd=0(iy&7JQ-i)#;*U@mm
z8&o&QaY`oXAQD3at@Acv>F0Ey-ui;iqWOZRYo^?9|5do7Dwmagq>yypl*nCPXuG4n
z0-pBNK+5{Jc&q3h@lVueNz3X<xzj9K)4m+exg7?x>DQ@N?r5k`pNW<lD%_#0V9dR>
z20~W|InOu;<MkEr;>kg@upbUT*iATYQ%N*$jN~c?$H6%V7lGyF7x?6L6uBcdn?9W#
zOQZHKz(XZb@ROGhZ5gJRB{2bi%Zy{@<}EPO&=}1fG|2tT*VJ=$5?rn!oYVdX5FxP%
zM@i=3-5+ytn)Y9!>&^#1fd+0<7O@qH7h}3**Xe?iHF$63NRlA2o!noo!1bTaq`~VO
ziTT(_B6j*E8Flg`2<DWM)9G>K$I@{6vF`{>@tcm{P7RWyZx_MP<h3yDb`|f}{ns=@
zr2)(B08aLr;+6Ej<d>TbUVW<uML*+6cJX~G{w4w=dGpu^`7Bgf#fOT+skrLh5&Uc5
zkIF|L;*`dv)NB20%q#W8&$-WVTh9SpXf^_?tW7Zfc^zh`N}-a}AD(!&I*P6Tf<wFn
zd{+v1thXD_y4<EogRb=Gu4gFy5wYOP6e<|GhAj_!jMfMCu^TBzp#8-F?OWnOk8X`7
zKO<^!q>UDQFKr|GAFox_WKAa*Ha($1G08A;w*~%ur672tvJw<7_LD=yZexy7Hs+;d
z!<X7qn0dXPJnV>q;{i{}(QkV(cR-vyoBD#tj&0<<lb;FqQXhfm*Mr2X&{9xZe-7{7
zDglG_$KmIF0+-*8qZWhHNao%S?DT3TYZVsb^j+zKu?_kp@RK5ZJTFZO)vw?~G-7l`
zD(_zOPOx5S5801J(L*JR<au5uOFK_tu?IriZ9iC^c^+<0Rbg85+exFlEDRWBkyG<T
zQSD9^hCK}>kGf{_j(s*j-DQ7>|Ko*_?^#K<``Ce9?^z5=8NvQEA4ZipML0J}oX#2J
z49zK5Z9COv@M>E<xSfiE+>AZKd{7cv@{1s$bvv$nww3H!8whtJZKm%+0yU%6;LhBy
zB-`~4zHgk&^b-z3SHpPR-{yiplKCk4=P&hLb_>?<lweq3Fe4Al*q^oeczLA}CW@=F
z@Ak`~rg<!mIkJ~+_?$~6gAG`fhym8!@xvnlkJ0|HCChnH#OCWvW+qPG$S<8zd}O9a
z&d#V{CPp!M@LN2Vb}C`4p(AzLxd2>y;&Im9aL{)#McXGOSZ-F2_q@lkIR0E%!Jg4g
z+M4*R&z6Oc8pk%T%s}^RD==Gb1+4zHjtr%y(hJ-|=zRN{ery-!MET+nV<m?&^FN@M
zuNA58l4cugJTNe;j?_0krdPgRLUO@@%^9}<wC<ea$(Y=rp<35$pB8)4A%_?e7EwYh
z`ie;K)%}9vEj)7Jwklq9tff<8H3eTM#|l!oUo_D;n+6>(q#J%Jq4Z`p{dQa!F;-F~
zWz(8yP^c<;-1<n56=n<8nYj?jLrJtcBbKIwm(i2w$5Pu8B`kUNmHzO5NPab*rc*y#
z(<y^f=&39b?BJYfK{}r&ckdou!uuf@H?CT+WVAA#@IFXJ_N9>d7d%MFLoI3_r%h~s
zN#o~5zXi?C7BqkRYT9<`7jLI{1f4Z~4BdH-N2mA=$IOUnw0DCz7UVk+6Bi@$Chj^3
zHgKhDE(ZzH{c!r)2r-s$%nv`U-w4-^9wuMZo{_0vgCQg6Ixi;Njtmt0!0fJlWP?T;
zE?@JO)aV#+Q>A@TyK^4YP-%$yBf<qnmqDX-4EbB>fXexc$YA*{3^BckdR;|ubYdHC
zf!8Xo_M-+pvC$NBN;KGRq25u$%Ys?2tLW;-ImE$54Swby1IL^k7{={{fc%f7V74fx
zsEmVUH}iQ>13yV;d@{|5Od*90j?7`%2=?&VFL)|=NI$vsle}&Q^FAI2J!?^}YiT?Q
zD%?xv$=cwe5wWP1F%gOr#9@y88CdT46uQ=4f(Erza8JHN=3G1i(eXEEdS5+RH7o^&
zJ{*BJ`i_9BF47kfa_Bed7IaPi#*-eriJ!$4$R-Z)Q}8Leb5=Y&J{AtcZlplY$KjZ`
zW);S-$|k!5=R!xqOPrGEgR9De@ax4qoPChToYw89Yr`IqUEk6KYrE|Pbc76B*s22$
zoR%?Fqw}P>r4v@DCg7;;cj$}DYawq(Fzz2A!+ZL1J07xn2!lnXFjjw}z$bk#&H7P^
zHciqjxKDx(xqcwc%hnR<j$+=XmInOgD9WAep3ic|5%_K4j0g3UK=eQc=-4EYi3#cO
zB1aCN_~z10F<#8)OeHCZ>?XF+qU^h*7^X=6h3fJ(XlarmNEo{oHx$l+_{nLgGHVca
zNJw#SJ&nlT-Z~Oc)`=OL|ANGlB~@1ycfs4kQgCa}Wr1q055mF-Uecp2(D}<6WBO8X
z-H0xzl&wbh8<&MPcj&`|ezYS_j<a!Xg{qrs+;V4UCcSzFRO!wkkE3s)MQbp;F&M@r
z7LSJ@BjNph<R~_Aj2zeatQbEn^n@z|XV6WDa9=(eve}P^vlB|POlRI@!5LEp{5(`i
zLnluJ=P}QqHo5_q^cd3L=Q>fM%nsb}IQs0(5bT>Ijfcz+Qv0XN@WzP^;8+_65;MgZ
z>1f2jKZ!J4{|$_p@Q@DEog_%wFkUF@B@m_HBy_%NKsqa=;F)Y9hT1N|cuO(v*K~6%
zOxlQYnUn??6@dJodh`^e&@p<xFuDF9{_Q_NLlz&yJ=&_(tzNq!W@9v+Uh<C2xe>}d
z>ORq>eb<TAs=1ukY*SFaG>ZL<?<Z;V77=+ZZ8(wt44SsZz!>UH7A&qJ_H`#|JpULx
z$=JpYnQvs#HVb&$#w`|1eg6u=AFU7!bbkb&n-YR2z9w*faU;F6x(G&RsnC5o)7aJW
zOw0`EqGu!y<IT3)s5JZ>sP0IFpyDVzyIYc2Nh`A&hcaeW?uR{@@z69O2PVCEM5+UB
z0}h{q4$-Pu*7*+8o*ZR;W{2_hwAt)M_B<GvxSl-Px(tu>BvYj)>g<r$IdJg~Ku+5N
z$h=U-`w&jzUd;#R+GzH1*=iQ#W=3;oRTF!i+hmhTH8FFU&-&tAx!^6==?S`x*1JdI
zQr)9;V8UOP<}nq2zLBS`3+BV)L$TQN(Fs@EQ!*jb1_R>Wk*OUe7?7L~CD$&}>Kr@T
zT9iSREtiuy;UDm|eI{!deGI${c4Dd4L3XLPl$NyYhJ%sO(Db1M0;vNAwann|>Ad63
zD!6o2Y?l=FOiDu8t}8Hap@CrK=pOif@gtZNIdQ8_oMsLqPx1m*AEsej3UPSob{IbC
z1NDzs#!i+vVxmhuuBcZRi0Uqe{C=hC2Z?PszljIiZ|<f#x}uzK$~sW9`+~;bB$(?^
z5e;b7;k@p-LHEr8GKJV9k&<AmMjGPnu!YRML6Pb77&E13Z<v>xDs$J{h~G*Mq2Bvo
z_$_^s9o`}ZPI*rSIlCWYNyBBxP+!GF_BxQAE{Ev_otf-TLp`pwFu_r89%A{EVCbT{
zaHr)c8xs3N;o4G|wKE*Y=}uvCEjyv!`!_0CPZoT5bOg5@)gjX7ClI|M8SaoE8$Qpi
zfa%JfWYz8t-u``j*6XVb`xR#6Enz*QaMeN7@4JW(^vW?{!XGLasz6?a&@ZR$2c5iN
z;hc<S0oUVDWtS<9h)Ba^?S0r`YtM<UDuJ7Q%h{Q$*7#~;In|S&&Qdas=zvHn7>p9;
zCmSoVJtmq-TK0pg<_hTBc@Qfmda{KbVQjymA|yY)gE48}X~FKX?1g_OW;twygp2yn
zIwu*Y*gWSMsviXF;Pc@2X9~M<@EV$%i8I-x*LZu{2&Pjxi{|s~A=p!eT%N{3Pox}#
ze7J)*CySt>O$Gg^SwVI---bW8j)Jn62aMCmgOV}Jh<D^c*j7|75Z!$MZ`w+7rPbvS
zlPb*~P4Oc)HJ0+U&V)jCS}}_h-dm+(YXzJ18`0%Q8GH&mMQ!v$>4QWyE=}wSaoCy-
zJvk>aX)p^kZzDuV*MZ0mJ1k3ThLS%T&{g77MO&hH?OV6u#X(J$?0Oo0!W6i*(2<S&
zaFRZmm<l?MM|t<Q>=pPM{vwN2&f}hj0+Mk?9q*-hqr@Aet4n2=N!VE0h{|Ypa1vM7
znFwyh?w~11MZ^8!P!}M|EtY(Y4f|!l_26do%Qaym2BtByip%)wR|woY8v}Vymw=6v
zJjxZXr+Vr3<ax9s9)Frir=ENYb4oT5*9m*!WaBR4{5=A*+E0_?SHoeA{VB+HJ4LEP
zzft~&ObD8)2A6KRfWo2xynDwAEjyM&LvgFXbvXl*HVyjxP68DEd`QMzxK!09I~%w5
zA}-X|fb7}L0ut4Zn-1HcC3a%BY7i*=$tI(ws^L{5YY4U&q~mIj!0EdYuwiT_wEj}3
zbt~sk*&WinnHd~S^EINDZ9#a~sQjw?23K4vpHA;iwnrnc$uPfd38cwdky^n4aEeg^
z*)U5SpTK9@Ww+tQo*m#f^qpATSLZI?SVWaCr9p7yJctP4!CcuIx@TPxH1cv`dh#a0
zeycPZ@2SA8bFn~^#6pbeFN3Z1m+7^sP1JaeEV(-G9-ZMk5%Vu;vd@}c;NejTFAXOO
zuAea=at&E{+3*w8X>9_Zq_gy3{2ww-Gnj4D+m4Ga=R$R36G_i(MY)$k9YICA%DnI$
z+H4HOV;Zs0Q0@e@<pL~tF7zdWk>D^?h%?qC;=}lhq$@oS3ggVl66Gs6UaN&_TTysn
zy_~)AaDgAwX3;6i!=NNG3uL33$fA%?P>_*^9-Bt8v~oV=J1n6WP8}gFK}{$VZoyr(
zc3^kjde9M`r8qe@8LxfKz#oE2dMnj}du4MMR#ohW6N_swux=_Sc2`2-V->Do-g#W&
zJd#PyoeG0JPtl}P5#RFnVpww%_$ee}49^O0bmS3p;WZn*emG~lT7_%T-9Al+nG=Uu
zIRfc-lUdW}cHY(>y}al-RpjQo8F=(>AH-bI1DQ)^^g%~6#Jijp7$wM(FH(o-nb1er
z-!mH)tC_*WmbvV$ZIfVLZWY<Opcp%wm*MS<lR_P93|a<;;YTNX*kE@K0wOyhBP|W@
zS4={sxe{o!vXG7+Dycdrdljlu3gG?0PgILc;Y|I1*xvSRAajnN#<@|txPHwcTrqhw
z1W8)K^-~t`^4(9&zEOm;!Z(4-o})}ND-~)ljmJIb+o~p8x4{05hpOZQCz9veud%62
zo3$IdLFj@h@af4QuDB-8J|B()nF?c={3VPfx2(XD!gW->*N*L7@r}%gT?@*)^<l?(
zZ7!fo8Yd(yV&iNk@N&AhgY*?1IGa8PxyVU)=8Fg$uVP7mh)qOJa|Ub4JPHBLD|ph@
zk*K-FlCI19PCmE&!b4+|1g|?EV$r$bv{>pmyf6!)VW$sa<>@mpEa?+@zik#4WnY7f
zrp<6kS&}WanF_{vk7$(iAwk)hIAMLtt8(q;N-9mw;kuz69y!ma8l|o<dfNccpg@ev
z37QD5mk&YRh0mz8KMv}>uA%;fD>!53Re_<xBC=d#33l&2DC8{xv))~Y0PhuWKkzp%
z>PQ$wCa18!;fVIRf6(;Vc`RHU1GmU}Tst0+2Bf0m%UW;>Y~z^}-$nj=b?|G9f|}FX
zU}lhvL;I%U?g2e!I`3DNMO8e6L^M)MgLrB-YNX(y<S}|B&=f`lPGT8D&!97<0z*|Y
zaL4X`yzACLrh8AtkNy%UPu+-gi8fPnwr3BQi=bxGcXCy8H<9?E1CAOOP<rtdP`|&t
z@<Kxs&{@YxV5bi5vfB<p8NVUzLK^Yzb;tNyZ^-bi_4H&DBL~e)$;+w(pwm|k5h<IY
zFh-qaO}t2|*I$FUj%j3T?0#@wbrw^Kf1>qlIqaRkfXtkxL*q1Rahymy+;AY^sB;pW
z7S+?}t-ImcUq>*J5(7{F8?<vM64z`w0N$UC;lt}La6JBq_rWF$s`}%qYJ(m_;KnUD
z%hrPoypV&sz95ohbOsI?DRLD(2Lw@WmgHE9Dx|B`QkByup?!Y<q?#=vE=juR;yFY*
zcLia#sV@X~oWV`DDQt&9H`#q;FIihXi5^-m28Xwp(3Z_^@LVH>7ux$3ceT2*s1K*%
zfyzpJ=IhJy#8N@ZY6$-tnZmV(zxc;*6}Bd);itsIM4{9X5^@~K9d{9!@%Ii}y;qvL
z98m_*u192GcLIALn#k7uy)L-C^b{%o>BHrlr{KBfY#N)_gS{0$*?|T_HX*@^4T@{9
zPvw<x=;>CleUwe|6GEx(IC;kSC+Q`RYMc<Yk5>{JPJ33!;oPEVH17XCy)sI`xWy~s
zL|7q-k+{OP2_)cpt~xie)Cfl_=9A<VZB%~hBb=|V1a?M%{l=lx`9c|-9l-~u8IlYw
z`%$G$8IH7C!e?<G^vHOPUn>bOadHbLb|>MBqFflGKb_mElz>KG+@RNbIXKj*V0T6i
zc)0I@5H}6*+`JE>Oz%Km7=tt7x3I)!Iy{u-pyK==+?+lM)?_N<!SA}9&bbn_cKHe)
zDwji(u(tVK)d8wUs6xu`82Hf>&EB1SjR)=`+_0Sm`KNTj%E6mx7szn?EVbe6o<jO0
z?j+BzHIiQU@Ppk`KEZzbqhO=)oJ1a(gM~V2(3GYEpSNse_V2$!%A?WD;}g$zWKKUB
zvnCz3@3LiWTVnBAR1mw7SqlSJ%TQs-5Z#;DNo&U3fQ6;mG%v#3_Nd|tlxQ$!``;yy
zUn&D|&fpW&e-vT5bQUa0y-TOh^MqY@%hAN?HZ~{@z>NoH+^iyf^8INYY`pJ_U5;mQ
z_Nr4<Pwg6vSbh`Q6j!j<OS4$g$X%R4h7ue1eJL4q7HUpP_XVq3bZD*I6@;y}@M+T@
zyi@s*j<_`sudQh!;cxUo&hH+!Bv4*7r-?<%hiImA26kG^p-w5^c#|bHAz3vRx>M}g
zGToi<UP*(i9yK5CyXmlvFVo?|SVvU-YYNr-Zjd0&Ml$e?kNdjM;a`_{9Oidwx<k@Y
zax}Svo|y8ANZE=rn~8IQH=&TK`stA^)vs+0Hb!83;4r2d?}UGDpN6c|Lv({~Al`dC
zk<p2{?9t#}8da=~3xv5|<pUMAVQ(&G)(&HCyGn^n{7KXbh$q^{w_wcd7_bu<;NhKP
z!F=jy$P)WRU1zDY3u`8_zoR;6p~!glJMl05cE*I>pPi595?_dxnE_kSst37=9k{c|
z36<7+L9Da`4X@vUU(e4H*jb;#EAI{28#!Y(^VeSDpYcv0W3d}YPMVG@tGysey@K{n
z@Tsy_J%$(6W0<eg4>UGPN9W8)I<VS;##u?=JHu!arlE*}jYgF1m*#c5c+-->BXr<{
z3|5!Cuq8+5P^m8^#4=?Y^>!Ba<*`SC0*5RbQQjfQFq5Z>--gqhZ7Zn!YkS_8@YCeV
zKn`zY@&Ip^w>dF-5x`q69!Sh2bxHlyi$vW}3NKlhlGL$x1oCBs+R2=tX@0B7=l4bA
zUAHVYkCZ3%+fvEr!7^Tp=K(>_*>n=HMTggZW;e~GI`rq=a$9%b-BoMzDg<)bI%Mpp
zES}`5J2X|o!S?>i<0RzO5rONc>4N0`C?a1gi#fN>)84WZbl6Y^0Vfr_py|ZBy+jfH
zTNLT|ph`$@UJldu%5g{S*Wq@fw`A3nRYb072K1M9fyc=Mcv18`>{J(_C*B<um_r~q
zZ`Ou=+b-hV>FM|(Ar8D{<#6ArXlxPc11u$t2ApjHV?zqk@3VyU<9y<{<O@;TagT-@
z%)`!6p2D7?O184(1LSQwMjwrwLOe}diO;cQx^m+~Jh|lzZaMv!WV{*8&Un<qh^R9H
zi_5B<_~V}feu*w;|2htC|DB7O+Q-pz=}*ep-ls2Dx#Gc3Rj?^i9UnPwXPJ{6K+q<^
zsT)keYxWEAOWsDfABuRQs27sQz9D=U;X2BmAm-y!aYjipWCuUTp>RVie&|ArO9p8`
zumS7|sfXPKQ8@aP6sN!VIYhimhVP@(;f<*p`(_wVJ>Bonj!UQDTCoUdg|1)^|IC9X
z8*?hUA_@<04u{+Q^4y5zB&gZC9?X8WLfN}?^7q+H64YEG_<2Gaiq@L5&@tD*@!=af
zPjm*TygrV{l8?iW>>9G>w*{(dCPMpS36`vI8U0;<Q<--sV7{Uogz)9C%GZ{difVGo
zcMGs?egKUYts%RkBiW+w`=F)2oambv(GsJXH0XgdBs?&Ol00!-s<DFjh!)bWzb<S;
zM=bU}%O%I$EbxPzKiHQU!%d|-u<V_SAl$iw3?!rA<NI5%%d(z2EK4EM*9rLhM?-X*
z4EZ6`2BGygFmAFYa<448oqh+YVVxvbGp$%yJ6;FJZAX*!lN5zox&(`>Y9;#x6S+~&
z=K=2b^C|<MpyXf-EDJJX#Wo-5<&hgn;)4`=V2LqWuM$BcwT&>>;t{-9d$OuCXDNzZ
zyNIKgmZ9~bXpGi=3o5}{@a*M1n$~CpGjc_^oky*O@$gsP3wtHpx8=8B?B?m1>Klg=
zcFTAf8Cr1WOfhZP*n|byo58u$g%oDRVMtXN$n5cfgRO~zk;3@Fq*sv*?TCf7N`tUn
zM}r+8%Sm42Cxp;O$Z(Co&)4=~xY;3`a3M>ersfT+;_@Is^f5$D%EBLWVnMv625S4Q
zFlG4$IQ><CH-$Qa?pqs3{S*y1Rt(V4yEk!F{0z99Z%DV?m*yH0B9OZj#%$h=BnvtW
z$XOE+u5R~x=oGhswLgA<`I8^0FggogiL0?IwUPLv=QO;vF~VQ+>F{gebx=<i2&VK}
zf%<0x^Pexk{J$?qwUHYHeiY-<c9ueo3dhcFoI;!HlX!Dhcwy~mP5k~&sMEfDjz*g#
znX*Sb9I=Xo%O5lG!oH<E`4Vx?s!t2*ZFIogXB3yT`8Br3MUmxwYuLMyS#Y`jG5(3J
z71nYbVEWAyl(+X1n&h~lXv6}TqPUC=%xr@6pPtxW7b#GCd<0jV5P^9`8Mx5X05r2N
z2-Jp^VRy&{S}NB=Dg)b~?A8bP^mQt4_g{aQ{9cT?XQc}IT6!^8&w$ODYs{wjXrOB4
z1Zr}k5O1~%$Y_Z&>NEcqs7Z^k>rN8f-V^x{AhsE!3&PQ0w==9avEyw?qnN(Ann-@%
zi^}Diuvj+(Jl!e;%bPOEa-q(V^eYv{CN|^Bv~m#Z&&8A%7clbG1aeBp94;4pLqA>+
z=sfkpS$pJIn6V}Fd`<zC3&*iAQHyoG+DV_ar^7&E9N1kffa;P5(AH6b8n54=YvT|Z
zz9Exz*RI4`qe(pFE!NZ`H5az96O=evauJeQWb2-Ow7Sub=Q@4yYlQ`uS0oB*E-|RS
zK#J@8b^#yN93-#mECp728elZ{k6@dHG*gOh#??10IsR)&ZuER#QgJJdt#?(b%Ka|M
zB_%M>gbdm%?ap;C_>12+OcCa`%IwR+YC+=)H?HWpGM6<z7~hUO3x+aX;NK|=uJ4Uu
zeM2IiT?};4dmeAXGA9xvb`g)AS7%El=h_Y{98VlNr_zD6DcC$B2xhm&Pd_e|30n;`
zV9)*8&@*l_x7@!7hJ?LljUH~$Amss~$DWguG5d%XmXTWokUv&V*t2c}akFO-1NV6(
zRhX}sZ5)NWL6vvIegwU7uYk6Uo4_fg=wV9cG(4<-35I3n<Jq(O!TPBJ8B7turpkV>
z9=Vbny;T5fKgxhbl@42zbBSur(5Lf<O@kwjj|5FM3~k#v_?#LIQ*@+oRjCxn?plL4
zEi!0C%4oq@_2V$kr=57YtR><j6Y<^hW)MwNq?uz(00uvhfQ7QS*ZT<W@#^B$UK$I$
zNxC3uUPL|0pU|j!9!||mr|O^9LuldyvTwC2h)f=iPkJIrdz%6dtNBgKPCmfaI}P}@
zz?zuzFOrz(U|e~!looS-!a9lt9X<GkNItko=8xHjBZgklBpqkitt5v969ISoRp8BC
z)mZYrm+ZSG3wAr^kl)i9=}XT6GA&~`@A!!)Bz<-$QT6DdaYp+|)Q7WlOu)0MG|L+T
zl~2AT^Trc=Cic}<t?abmNn#xxR^iSQ@fDf=ars0@JUEw*R}sO37OiAW>pFV#q%2Ij
z{gU2$;YP}{kCHvVwPDSTN#vQPF~YD=ysq{{pe}QTR@Zjo9#a*jm|F+41NY+Q^@SLm
znTwM&i)oahhOnRiGZ}X!hD?!HgZ}q9WYM2mtVxeSeWAYe-OmE!G>@Rc6KS^V%uG5I
z>`ipl9B`W0T)eOF9qSg%qy{IY*p^qzF<wp=u3l1OWQi(oR^nMmc>arLf72J^9sBXM
z>@#{Uel1uwYT_uhH4qqAz-x$J44l|Gd>|%^zRgc?%m+{C3E4|o*dN%LUW+CYbFnN#
z*z+OuAIfqUs_F`q*b%GWf_>v}2=v3Q;{q;|xAo9$)=}6?71B6X6eCX4?ak0>?*OuG
z`|z$~8P?3N;?XT#blvb4D15Sx7vz2wb&uQw3nvSjbL0YPJ)6v{DAs2$0`1}Do%yg+
zF$6;P?Pgy7irBkh4?U!}021ScwfSaECfoQxSc6N3Nq7?4cHATH{Pnn}C(q%Fs^Q%J
zMJGVHF@(x2odvslMsXKh_kq2z_OfQC0QT6`z#jK%oEs;LGbalBk(b-Rqus+HQA&Z)
zmOJ3Nvy<?~Y2lbib28ME1pB>zfHMCugr%M$57OV!(ky+FFR_~5ej<l9<)V1w@)Axu
zLKHhqLg+2k;q33tI&@9`K$P$AqGADa1bSDk=#mKoAgeZtMd1VLR5Xko9kPd&#d0`~
z9)|?e{k&<pQaGsIim%gbAbhVnX1ch7T8tO!NWB5`xCEGKG=RF@tHEJLBEGt}Uofz)
z5D#dbAmjVv1XuFPG1X_?^qp?kai*O)#={wyJmDj~zCI2LHwH4FZC9}??<mpxw1aJ^
z3xjng59yP&IaRZt%aKb1M!=>G5#ENU<m*g6=#INfUfpg+{T;i2_xcRRu2W!#9@nDd
z!c;QGr4tQ{gwHqSysvt7TMk`6>T?%&%@maGHD=T3bC_2Bj`X<?qVxO>@KW&^y1SmF
zPKH4saoZc7<(wDfjvv9&`|Z(LK^2v44`6@q3Anvgo}L!DL!_1#psJq<>3tJHy648w
zo(Rf&eR%>SPPz2x(Gn_Yo==n3bl?TY6jX?GA_H|w%xr}+m9m+NV%l2pcKuukA8`a{
zW%$w?E5;DLE9y);x(H9#I$~<TbIg5hz;;@TaLZ2;rVua;Fy@o()~?rhjBG>=-|_5n
zju=}gJp*$lyd-0O7!&W2{;+x53V2c>$sJql4#zTzvCw}G30#|oj*?v@gb1|-wFz*n
zB8M(OVAa2CNa!J1uBM?9e+!D~vvCq|VeMJGrSh1ny<binQkS#$G1Kw9*%rD>IvYD2
zV|f$rujkp%y>1(DqeLK@Gr;pcGmS_-d|7FZ>Uc;fRL_bUE9@muW(Qn(cxCVjeRJ<D
zos*c4Ne1VMPG2O>x^<R!(o7U>q+{r@*e!TzwhO*E^@+4yG!XV5O~l;(M>N819X?)O
zNpAhH#m#P`*@lQZtTw!YpCc=1psoyhyw^cuf0^f)JW3dURnSYNnJ9Do18O~;jZZSg
zF?h5sdcM6&#Nxm3BBl1z_3nO{C-s!x)t-u9R%ZxSOnix9BUPDPZ69^M*k#*$Vi-K|
z)WFyEbJ1_M5;oQS6|{w{M+Jqu#H9ElUH^Lv6<uCI@X&bDv*Qnqzj2acwI^+<3FC<r
z1XK+sCy^gNLTvNK<x<u*mtHY>L+?%y`ZPxgqHWhISg$=m^`oBAyzFUY<a;SJx!*;z
zjB4ri)Jx==qcs(O5l#Pkzoie{77`_|e1X_DL*DkvuIXE?e0Xi`r-)r*tF1`JO2Jr{
zBEiev>$IhC7BwvRLO$&HK=oH#r^lWCQkg{$s!n{6Me$3e^t{S>GG_mHa>eH$@wQM9
z%(|$EYay6wuaFVu0a?_-{4USy>STKUE|QPOD`}&@6wc_WA;+uU(_i{I#4WAZR&Y%O
z_YQRsk4ZlC+T~O9*Q7);Y2*tKbVS>_)jt$W=x?BN;_Pu~*;yhnvYT{$GJ%!#b+~I&
zB<?4NNca66%zADheXPx=BPZS_AtpRLbR!Q#yM?_xokp<aemzc}^@|r+m<i!cb!2Sc
zS@wGF2Qayqi96>%KpOrKT)jkC>8Dg2-FXG4hHav;6TU$6k9a&h=M7N`jAj=McS03N
z3wW`z>;!KH7*vmgX3;IM#Gx1MH(sVSLhbHV<4>x1I}Vh>OUTc#B(Td^gzHwFpyTH*
zVQP}&NZqYFIBe8A(w+001ZkTP%T9MF&9H>(1$LyySeH#(;{|U$YRL25Dv)M)3>Rp*
z&{sYTja_7UWffZNxw0!{8<o<xGd*Cvn>#)0IU1$o$3ukQ1Lmo&$Q^w$j2rnglv~tH
zX`8ku`+G7He&-m%=Nxg)dTt2Hx0ax?>|N9t%w>z7)REg7!*~m)n6k&^7x2zuC1&b(
z4!<XVq!IJBLazC1LDObO@bQ!7mY%hwucmD#e$Ni0c6uee9y<-r&%P>nQLYP?qiyIY
zYjZRvPRw~?k>Jz0EHL_EW&LssAKsrche7K{yd~K;p`(2jC6?bXw96WgJ!;}@6>61V
zL(W4%{1kG?Zx^oo`wF7&r(^veV=hSJB|bPb7e|vUAU5B~)Fg@`o0Xya#t@8dDWI>D
z6JcQ8GR~a4K;jOFgU)?rPLV!?WXlqqQJV?xFRX`tjjI@^a)s;~Y9K4mSaK^hcHm-j
zPSAMyEis7OL#I|)LX%MI`gYy`Q&r>PlioRW<V$dU_cR!qH{yX^dojxQ7m@z95#U`k
zOcLhybGuLAovz=wc14k}m)(u9u%}@1^ag47;b?fPHc<X_&`eK2MXN%=n?ss#aZwAd
zTbBvnS6qh?RfQP$UYG18Y7EE9*jBHbgqcAlBttfd>OVHdZRa8d7WEQr^ua+iTEfFU
zol-<=@k+{t$-y(!X9)?K=%Z^winRCAyZwG>TwRA_EL~9C#u@hc$uN)hjOpT2&0&eR
zKb$U_0AB>rM7&@d8>XU3C)Wl7w_6p8l%5e+%^VPkNM$im!ysYGNA!}d!_2>@$<mkz
zHsY`o%inT{RCa#HhUOuX|3|3Ldz7Nb#&~vQtR%iMcZI=CE`krWO~k#$hbNYG0Tk+9
zVC&`zbP27-*5f|d^GKA-@#z;t=VuVPkM6jwE09PnQUdc!lgXnVCDtXXz!X!BF?Qa7
zKyj>&P*W8_54U_`V73Xl2}U$gwi>3rOBSGnIk8-Ll^(0KBa)GlxI}9#we(w!v5~>l
zxlsf6^QVBYHI1k4Hv{kZ{y;msG2C0>zNL=0Xz#q~bogI6Hq)z+ly4n{6Qr7ee>MT5
z2P|-fq8zxzs*!0&F43>Aswl6llzPY>gqBn5nUvL0h*@n0I+FKnXZRe(u+o*N-(*Gl
zubvcaev^n-)}Ita2!>J7*}eG8G6l;Zkeph!fy&)hz^m(LVi`{7ovS@T)f-yKqxB5m
zx{qPUUx~BU<V?DmjzbxLdm1|27su(0<>{4=rT)TLsrS)QwAY)+W-M@I-(HRe|E*@E
zb-sZ1s`$|ruCes!@jNPObp{n)sYC4RX~Yseh*o+G%ySsY(zDX=^;>;N9ySJ-+~vWS
zNBgPd2xp!=2T%~S7V2VM$-2Y{EK?iKt(6ZVmtU5GD!d>n&l{-h+>_{Ez5(Q~eWqnG
z;_T*qB|K~D%}mGLChU0;nyBg$**yo~`q}wZDp;K9x*OA^Ir3b4j{pu_9EqwUR>7s7
zMbygRDLjz0<OYP#s=l6l7zP*ahRj%hI$*?yjbj4g&c#hQq1Z~Gm0U=(+MnYmgLxcp
zx(Lj15Y}aW)RE&x86elS39__BxzV+2AZ67N!I=8JEJ)%8ylRRADbqDTgC*IOo~bNm
zOd)L28H4sK$KtjSf5=-O{eP8m=J8Z@Z67yejHn0|sdSr?6p?e*wNWWl6uFxxQi`55
zid%+CMWzfzB7{mwDU!3UrGy4i8fie06w;tl;yLc;exFDFc;3(bzWaREXYX_N`mXEj
zy|1&*TI*Wt$G2NNfVNmTaqY*#AiR4CelYZdPbKltI?w?ZU>C3CLIE80su1kGJeNFt
z;{pM{Yk+x9A=r!L;r<YL;g|=bms5mmd)9SQ=iQbtZk8TXwm*imp1Q-tulL}&_ddFQ
z^CV8ap#YqJtHdg)E4a{ZFC;EH4qK*87LGgmva)u6v^6Rg%+|~X)4TV`=5k6Td=tpP
zU9HsXxi$>V3n#6bVsxE&9Ox@v#K#+-ljRZn$wikPcyzJ}Ue8B5ZL9}ARl9@pi^K3;
zwHW)N`vnfY^aqWhF)-asmAL4hB&0zLB1g%?7_VBo@t7>T@qH*e>0L>fK{$wA2i7h)
zLq<m5BurNu8b1`HTJ&6|*S(meY8TPXD&EkX69+!;r-H1(L{{W!$CiFwhD)#JkVmsO
zqiRE)a6Oq0vsiHkhCF;nq|3!H{K7&w7beACygSVD9!N0H@q^gH@j6_=&M?>`BO~a}
zC;|P*-FTzUm}#AjA}J>w1Ye~yVKJ1@pFU%Ws>?7mTRn_8dJo}Z1AoA_$uW>J{XH1}
z=pfsR#X(y-54Laxh15$5>~i<fVqy&;-fzhAFVS?~7jZ#Y!zLoQnL;eL%YseSddzV9
z0h=$6#`K}GFez{c^>qurOr&x!_l_RRi_C;O16LvE_5xe&h5f~*t8CH9{rum4>f)r`
z^)T;S0EUESqLcf5`oT37TtmuHa%loyRXY!wRg|9_7>zR5+vtdzht$D;6FJ#3o~cwQ
zvwY<o)c6#_%GPUOn|F_3a`<;lXq`rPwTQ5hhl?Re$C0+|8-u`ig%zL8;JD5Qv^_ta
zE>;W1?4japn8^s{bDNI`;zju3??0npixt*>x=iY3B$4^23P|I{WLlOR%IXTtS;@Iu
z0=GAbJU{mi`f$x~GWf_N5WH4lIy0rX^vCKjqB@JVqZMW~N`dP0J#4DsWZ^oT7HX;N
zf&1PCLDm5YPRr;T**8}c@8@Qq(z7sp;!s3&76ifuVf!sLAOOd>0xr)@BN=BO@z2&2
zfqsVyPE~q@t*Zy3jjS4z?p{Ij=QO~AJz@|gHx}JCyK)~>U7=l^;f~r4jCB9VYC0vj
zg3tu=I`RVfs4YXRn^*E_#qVTBcN=uB+m6mnH~92NDS2<>1zmx9kdy1kVxE>!@29@x
z%CdJ<(M^xJ%{T&!b=#p{NgmpFj>k$ZQ3zZ<ni&Y^hHBK-63qfB<`6gztlvJNu3MC7
z@yIDO$vqtA{qTdh@#Sz)xMph6omn_YuY#I)a;$;e#^wf3u)7`$CZn&BVG-h7j`Ma}
zVYw7et3<OI2ji*DIRPDT%?yJRy`WRr=Je94Lv`{&AfYyvX>iu;@g-5#*tLdLItLMN
z$y0?#3df<&foC9_A<s-cTY?=vBD38@&~~>!>X^EqbZIetyRAr&=aNgBrVnE-hr~!r
zhyX7ZXwvy%20$D4K&x7%pxYxCd67}1%%cH^{2UK8uQln0xk&1MP~sSN6u$N(k-@?k
zaf;tgqTsGeiZbS+k$(%U_3J|Mk;7Pw#d*?x{smuuggF<oq)X6RYk+YlifL=I2Y6rF
z2Opyc<NBm`urT?IfHcWr)%8{+Uv}XB;NLj5Jftu`W+M)eLcF4F3R6FSLOI_s+-_jV
zc7Ko{5@}<(ZJpz|0l`)HMBD~6)(wT>K2u;p>=4ovQO&1@UL<eIGh9D3oysRiqnxq{
zGkK+ie$4>+=KG<&JQ(X%*`b|}KX$Jk#2uRZ632<<)3qJ;=yphx&AS<nWpPm$u}B6_
zS1%=B4H}64@=9#Xc_HvQH5>Iy-s1|3Alx+OA+0QKLB%6GNWQ=o<HbgBL#?h~;aioE
zTfADLSQG_@<E~StzKBlPE5(9VR*)ycSi-8npD6!BnmhmS4XmFdtRu^3L)!&){`7ED
zVmQ|t^Nv)(N1q5X;LK_2>)TA_SQ^$W9VOIYsDmNAJ=D&Jp!#!lE~xVyIHrDu-CG+W
z(AoghNErOK?l$O1NWj9MwGbC#2CEuJFwgn<P++_qy5+`_RiU#Xvu6%;uD>W8PrVP0
zCOWuv^(;`bq*M@@1P=~U0Imx<o^0T@+9}h03*W)3fI*z=l_hMZOB>13HGz@BaTBqD
zQ(zWY!iXntDOK0wF7K?M2FuoAF;2zP_lA=PNfJ<ZHIxkBd<u0dPSLXZ1(>N2hL`sT
z<3SZSv{H>?vQ}I1$O;LXu;npvnIB04SMEl|u1BQwQ8N9c5sMoG-{IYhIoR`$9y)LI
zBfPKKj$KEu6xf>y=MGjWvXQle@q9utH9VmJJ#FX6F{d6Bzwd+}#6zKnhT_T65OkKG
zj<Gw0bE2eWaYB~3uze$f^}A$H$#JqkJ$)}`JhlU;r+y@Hdkjx4yP3M^o`pjns_@wF
z91P4{2BjMI#PRe{{C-t{N|ts+;~F1qA5WvHmiqX>!GXRPPsHp;5yZ(}kqtf_LAFk`
z2m4EVz{NY8EGgfFc5AYz!-@=$@=O$%7gggz|57L^8xE^}On|1iuQ*mhgfq_#AWLK=
z@j~NzEMIjTZYYSt+tsO<ZXzPMQ*?rA534IYmLrLFZIWn}{S87Ax`8(*15}+G$%dw2
zJX~{+4m2ADhOc<|+Fb!`*9M`eWj4*cBnH-k-TeHtLbOvj4xv(MWZFt4NEzh>GfWMr
z=@(6)MUFJDz#2!rZJ=xJj)Aw=CgB&YByzOp8jY8I39pT2@p)r{3eU!#2c71V=$v?w
ze_R+hw`#B;=7R!Ru3Wev#4Q=Nc~s-Q&ktbC!7>cXNJ4k{VVue>Elxx(mc2CEh)0~m
z$oEsayg@H4$jX4p)a!jPaMIN<b<+drsJMm?%;TZf)re(&or*TQu2Nt1dxh~ArC^d`
zD>>FW1VN$+a?a0WwTdrcTE<51(pEPzq5CkMV3k6=tJGj6-lTVy%_XPKX$tlo;lORv
zKXAO*9-{0ULUwvK;H~f|OrLj-u84|+Cr@`ntk*MG8xlZzrj&#7U0wFRCX^<t+(bFv
zLENIQC5RE6hY>Tc6(oCP(A40KC^aY(c5Tol&W}rQjNcR5F=04_sGP#P+48J9xeR?A
zqcHB9Jb&2|HFn>%p6r+Kz~?VR>A7p=sQiqFKjy?DPZ)C^T@($Ew5`#(>IdJiV-Blv
z+KD$_ALTFmECG^%O@fT&x#Yt*Il-1syI9oLC`@jQz@et5{3#xy!~%lxbnb0BzB~`l
zIj3So!X%QOJ5w;<CzqDSO{EvKp3?+d;o9g0db~8#FSI0`N5^f}V<DR2IJ2@5uLi8g
z_FCZ@bwdMcCnd)gj=YDrcNLOVHuZSz6Vki(<5+Wk8X9{(Bf%r?;M<#j@L$$QQqb3-
z_G>Pp-p+@7!$>=7F+c|57I$G2|15FeIF*`mHndkipkl?_bs`o*{QzP58~Op0d-Vg9
zUmbzQoISi_`b+WHp&EYrb$Jk^h?6#+G#(e91}83<fyWm{AMWlE<W1_LR&+Z3&|pfL
zWK-d~npDVA+k*v{63NTa>bw|TdCo7~7&2q>q0@a5oGsOd#)0+JiFW~YH>rWsfii-T
z;+)}RcTACf3{U@vAZL0$QYR^6*qx_D^`lo)H-$k^>a_qa*F_L3pG(keI+d2%m7!9M
zEnK-UfL?w!2t!QIkYNYf=pzLy7(XOkFuCw0x@6CTpNrSv3%|R9R~tIX%C;5oW0eR@
zpPN8@3f>bh+hS<9ImO@GB!d;}Qo${t8rljI%zN*js&<C_|3QDjxcB}UiJ2AU?e}L^
z9jmd{|Dm8XH;}XX6wA`>z5@&NVPzM~1f9OO;6uFvlZ}~4ip!JO;8a;E8s83&qs=+3
zZ93fI#3D8>W;zR4^*giJ_MA;!dY${eNSf==|BjyFCn5WA5RA3bWfL}zV2vr`m=jJ!
z5);F@4a(*g`*_gNxieVho#*J<@B;7uHh?p6Ou;!#u}u0+Dkw@vV}sI0&QisMtH}!F
zA}TiHx9TC-J;|F?R%j!%{=t-O53s^x>u}H`Ic{U+Jo2zahNb2BW4Y%6&S={*Hl`pH
z7FQ>6LGMy=S>`e(TOypp^t}Oy`5W|5)wiItmU5yS_i(C%q-pZ89nAD#2cBMX18=3-
zGMxdA+))!39Q!hhZ*y9Px!(@Nr8Op0-qMKk`#J(uwe(m`^iP-)<jpzXy}(VYvFDOK
zyZGQYpHdkOmVWOF&Py<6s^7A?D~Zj8_Mwe@(d)0cCJQ+xsP&{b>Ld%7JXd1%?Z(hJ
ze?Iq4qm+fT>oe~&McjeoQT#sw(osR`FiVp%Vp{q8xjJ`m_I0-enO%@a){Kc0_VY8i
zw4{7iP(BTO-DSv5D<|$C=fatF%5ifB4?&ZbYBYEu35Hi!z-_ZOwAniowVczbgZpw$
z^wuu4(GP}j9|N54KM>c7d5|xgwXjZp8xrBTz0aXW{_1>T+;|4k*|%FDNEpLwHJ9R)
zMkP|4*m^;En>K2Pp8;cA75LVE6E?~F@Li%4!7gDUbe-wpS>0U;+Yssd;n$(YzMGuw
zsiKv04WPy7hv13X3K+1t1v=i{=AYJ{MOvn1@rzZ8_-}jS$&xJ-_;a?E2oB#`0NQs{
zsr{v7pkyqxznBlpN>1=)91Wm;R48@qie)FFM>E%hHc&Cxms!Ui7AUqYhuCjwJbi;^
z^mHnP@eAa*+Li;@IQ$?kH5QOVqo=`x$3bAe#RHte8SVNqgN#z)6<qmjL`y~P(fHeI
z_%U<*Xvyk(<gJ1pZWZ=@51rjd=grH7?1*lXC9(*$d5N@oswlttV-<P7VGj%xt)Q*X
z$8eoHEcjo=7^FMMVqD2c-j-{gM0)#0XqEg9IftBKYtDDlWYq&Qk+RsMnhHKu8))ne
zHR_gfnv}62n9zKVl-$w=Z_6D7!_tYDx;a_R&A_TOMk=ix;KSYh&@en6PTGoductc_
z-&ygjOlLHc@0kwPNw(}vNHmq6afxg&yiL!({fQ4wZsxD$-GR$l(rjUX4ld2SMFn25
z#9<ba;r>@(Ygat>)*qp~B?clwvAy2(H|k%L-ulB~RuNIwl7~$Xw&Sy5je_3ttd1;^
zR1_B1oBmsQ#=q+6OtH3___qc8W~kY5y*5WV9pe3U3IElm#jiF^t*!Wf+h$)SL8-zN
z)@#51;o|uJ-M)ed_iy`uX-QgJ@%}$wV6tql^?ONc7l;Y(c5ia96YbsoTNAdgoMP`2
z`E~2B5Zj;btBuqv?D(IpR(QF6X~T*A*}l3!zp@TO8OUE)qyB8)Ho>oKr10%8?4<r|
z-~Yp3*}HT5v6K6=eSZtTvWgD<SmXX|U%&sAb#Us(n)GM;`tPso@&4?T{%l|0`;}E%
z(9f=Ef3~mZDE#N)3Xfv?uz&fggPo-0_`fn~h#UMnyX;>_|Fd86rvr+J3~~O~2_e_3
d{VU@6r=04aI#EJ5m5}WHOjh{n&Ht~w{{-^h32guX

literal 0
HcmV?d00001

diff --git a/RLLG/envs/bullet_small_reach/bullet_small_reach.py b/RLLG/envs/bullet_small_reach/bullet_small_reach.py
index 0f42bc92..142e9caa 100644
--- a/RLLG/envs/bullet_small_reach/bullet_small_reach.py
+++ b/RLLG/envs/bullet_small_reach/bullet_small_reach.py
@@ -1,33 +1,77 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-
-class BulletBallSmallReach:
-
-    def __init__(self, env):
-        self.env = env
-        self.observation_space = self.env.observation_space
-        self.action_space = self.env.action_space
-
-    def step(self, action):
-        obs, reward, done, info = self.env.step(action)
-        if 'cost_collisions' in info:
-            if info['cost_collisions'] >= 0.5:
-                # print('catastrophic')
-                reward = -1000
-                done = True
-        return obs, reward, done, info
-
-    def render(self, mode="human"):
-        return self.env.render(mode)
-
-    def reset(self):
-        return self.env.reset()
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+from typing import Any, Tuple, Dict, Optional
+import numpy as np
+
+
+class BulletBallSmallReach:
+    """
+    Wrapper for the Bullet Reach environment to change the constraint function into a bad reward.
+
+    Parameters:
+    ----------
+    env : Any
+        The environment to wrap.
+    """
+
+    def __init__(self, env: Any) -> None:
+        self.env = env
+        self.observation_space = self.env.observation_space
+        self.action_space = self.env.action_space
+
+    def step(self, action: np.ndarray) -> Tuple[Any, float, bool, Dict]:
+        """
+        Step through the environment dynamics and change reward function.
+
+        Parameters:
+        ----------
+        action : Any
+            The action to be executed.
+
+        Returns:
+        ----------
+        tuple
+            Observation, reward, done, and info.
+        """
+        obs, reward, done, info = self.env.step(action)
+        if 'cost_collisions' in info:
+            if info['cost_collisions'] >= 0.5:
+                # print('catastrophic')
+                reward = -1000
+                done = True
+        return obs, reward, done, info
+
+    def render(self, mode: Optional[str] = "human") -> Any:
+        """
+        Render the environment.
+
+        Parameters:
+        ----------
+        mode : str, optional
+            Rendering mode (default is "human").
+
+        Returns:
+        ----------
+        Any
+            The rendering output.
+        """
+        return self.env.render(mode)
+
+    def reset(self) -> np.ndarray:
+        """
+        Reset the environment.
+
+        Returns:
+        ----------
+        Any
+            The reset observation.
+        """
+        return self.env.reset()
diff --git a/RLLG/envs/bullet_small_reach/confidence.py b/RLLG/envs/bullet_small_reach/confidence.py
index 3df7ab9c..d409c4bc 100644
--- a/RLLG/envs/bullet_small_reach/confidence.py
+++ b/RLLG/envs/bullet_small_reach/confidence.py
@@ -1,36 +1,79 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-
-import numpy as np
-
-
-class LambdaS:
-
-    def __init__(self, pos_tol=3.):
-        self.pos_tol = pos_tol
-
-    def get_use_local(self, env, observation):
-        agent_pos = env.env.env.agent.get_position()[:2]
-        pos_los = [obstacle.get_position()[:2] for obstacle in env.env.obstacles]
-        min_distance = np.min(np.linalg.norm(np.vstack(pos_los) - agent_pos, axis=1))
-        if abs(min_distance) <= self.pos_tol:
-            return 1
-        return 0
-
-
-def bullet_small_reach_lambda_s(expert,
-                               device="cpu",
-                               pos_tol=None,
-                               speed_tol=None,
-                               smoothed=False
-                               ):
-    return LambdaS()
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+
+from typing import Union, Any, Dict, List, Optional, Tuple, Callable
+import numpy as np
+
+
+class LambdaS:
+    """
+    Class representing the confidence function.
+
+    Parameters:
+    ----------
+    pos_tol : float, optional
+        Position tolerance (default is 3.)
+    """
+
+    def __init__(self, pos_tol: float = 3.):
+        self.pos_tol = pos_tol
+
+    def get_use_local(self, env: Any, observation: List) -> float:
+        """
+        Get the lambda s value based on the environment and observation.
+
+        Parameters:
+        ----------
+        env : Any
+            The environment
+        observation : list of array
+            The observation.
+
+        Returns:
+        ----------
+        float
+            Use_local value (0 or 1).
+        """
+        agent_pos = env.env.env.agent.get_position()[:2]
+        pos_los = [obstacle.get_position()[:2] for obstacle in env.env.obstacles]
+        min_distance = np.min(np.linalg.norm(np.vstack(pos_los) - agent_pos, axis=1))
+        if abs(min_distance) <= self.pos_tol:
+            return 1
+        return 0
+
+
+def bullet_small_reach_lambda_s(expert: Any,
+                                device: str = "cpu",
+                                pos_tol: float = None,
+                                speed_tol: float = None,
+                                smoothed: bool = None) -> LambdaS:
+    """
+    Returns the confidence LambdaS instance for the bullet reach environment.
+
+    Parameters:
+    ----------
+    expert : Any
+        Expert (not used, but here in case the lambda_s depends on the expert).
+    device : str, optional
+        Device for computation (default is 'cpu')
+    pos_tol : float or None, optional
+        Position tolerance (default is None)
+    speed_tol : float or None, optional
+        Speed tolerance (default is None)
+    smoothed : bool or None, optional
+        Whether to use smoothed lambda_s (default is None)
+
+    Returns:
+    ----------
+    LambdaS
+        The LambdaS instance
+    """
+    return LambdaS()
diff --git a/RLLG/envs/bullet_small_reach/create_bullet_small_reach.py b/RLLG/envs/bullet_small_reach/create_bullet_small_reach.py
index c1263ede..2faba864 100644
--- a/RLLG/envs/bullet_small_reach/create_bullet_small_reach.py
+++ b/RLLG/envs/bullet_small_reach/create_bullet_small_reach.py
@@ -1,35 +1,53 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-
-try:
-    import gym
-    import bullet_safety_gym
-except ModuleNotFoundError:
-    pass
-from envs.bullet_small_reach.bullet_small_reach import BulletBallSmallReach
-from envs.bullet_small_reach.local_expert_policy import SafeScripted
-
-
-def create_bullet_small_reach_and_control(orig_cwd='./',
-                                         device="cpu"):
-
-    env = BulletBallSmallReach(gym.make('SafetyBallSmallReach-v0'))
-
-    # create controller
-    control_dict = {
-        "SafeScripted": {
-            "coord": None,
-            "local_expert": SafeScripted()
-        },
-    }
-
-    return env, control_dict
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+
+
+try:
+    import gym
+    import bullet_safety_gym
+except ModuleNotFoundError:
+    pass
+from typing import Any, Tuple, Dict
+from envs.bullet_small_reach.bullet_small_reach import BulletBallSmallReach
+from envs.bullet_small_reach.local_expert_policy import SafeScripted
+
+
+def create_bullet_small_reach_and_control(orig_cwd: str = './',
+                                          device: str = "cpu") -> Tuple[Any, Dict]:
+    """
+    Create the Bullet Small Reach environment and its control dictionary.
+
+    Parameters:
+    ----------
+    orig_cwd : str, optional
+        Original current working directory (default is './')
+    device : str, optional
+        Device (default is 'cpu')
+
+    Returns:
+    ----------
+    Any
+        The Bullet Small Reach environment.
+    dict
+        The control dictionary.
+    """
+
+    env = BulletBallSmallReach(gym.make('SafetyBallSmallReach-v0'))
+
+    # create controller
+    control_dict = {
+        "SafeScripted": {
+            "coord": None,
+            "local_expert": SafeScripted()
+        },
+    }
+
+    return env, control_dict
diff --git a/RLLG/envs/bullet_small_reach/local_expert_policy.py b/RLLG/envs/bullet_small_reach/local_expert_policy.py
index 2ba72da7..7af98c03 100644
--- a/RLLG/envs/bullet_small_reach/local_expert_policy.py
+++ b/RLLG/envs/bullet_small_reach/local_expert_policy.py
@@ -1,52 +1,100 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-
-import numpy as np
-
-
-class SafeScripted:
-
-    def __init__(self):
-        pass
-
-    def get_quarter_position(self, agent, obstacle):
-        pos_x, pos_y = agent.get_position()[:2]
-        obstacle_x, obstacle_y = obstacle.get_position()[:2]
-        if pos_x <= obstacle_x:
-            if pos_y <= obstacle_y:
-                return 'below-left'
-            return 'above-left'
-        if pos_y <= obstacle_y:
-            return 'below-right'
-        return 'above-right'
-
-    def get_closest_obstacle(self, env):
-        agent_pos = env.env.env.agent.get_position()[:2]
-        pos_los = [obstacle.get_position()[:2] for obstacle in env.env.obstacles]
-        return np.argmin(np.linalg.norm(np.vstack(pos_los) - agent_pos, axis=1))
-
-    def get_action(self, observation, init_action=None, env=None):
-
-        # get closest obstacle
-        id = self.get_closest_obstacle(env)
-
-        # get quarter for chosen obstacle
-        quarter = self.get_quarter_position(env.env.env.agent, env.env.obstacles[id])
-
-        if quarter == 'below-left':
-            return np.array([ -0.999, -0.999 ])
-        elif quarter == 'below-right':
-            return np.array([ 0.999, -0.999 ])
-        elif quarter == 'above-left':
-            return np.array([ -0.999, 0.999 ])
-        elif quarter == 'above-right':
-            return np.array([ 0.999, 0.999 ])
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+from typing import Any, Optional
+import numpy as np
+
+
+class SafeScripted:
+    """
+    SafeScripted class for scripted control.
+    """
+
+    def __init__(self) -> None:
+        pass
+
+    def get_quarter_position(self, agent: Any, obstacle: Any) -> str:
+        """
+        Get the quarter position.
+
+        Parameters:
+        ----------
+        agent : Any
+            The agent object.
+        obstacle : Any
+            The obstacle object.
+
+        Returns:
+        ----------
+        str
+            The quarter position ('below-left', 'below-right', 'above-left', 'above-right').
+        """
+        pos_x, pos_y = agent.get_position()[:2]
+        obstacle_x, obstacle_y = obstacle.get_position()[:2]
+        if pos_x <= obstacle_x:
+            if pos_y <= obstacle_y:
+                return 'below-left'
+            return 'above-left'
+        if pos_y <= obstacle_y:
+            return 'below-right'
+        return 'above-right'
+
+    def get_closest_obstacle(self, env: Any) -> int:
+        """
+        Get the index of the closest obstacle.
+
+        Parameters:
+        ----------
+        env : Any
+            The environment object.
+
+        Returns:
+        ----------
+        int
+            The index of the closest obstacle.
+        """
+        agent_pos = env.env.env.agent.get_position()[:2]
+        pos_los = [obstacle.get_position()[:2] for obstacle in env.env.obstacles]
+        return np.argmin(np.linalg.norm(np.vstack(pos_los) - agent_pos, axis=1))
+
+    def get_action(self, observation: np.ndarray, init_action: Optional[Any] = None, env: Optional[Any] = None)\
+            -> np.ndarray:
+        """
+        Get the action for scripted control.
+
+        Parameters:
+        ----------
+        observation : Any
+            The observation.
+        init_action : Any, optional
+            The initial action (default is None).
+        env : Any, optional
+            The environment object (default is None).
+
+        Returns:
+        ----------
+        np.ndarray
+            The scripted action.
+        """
+
+        # get closest obstacle
+        id = self.get_closest_obstacle(env)
+
+        # get quarter for chosen obstacle
+        quarter = self.get_quarter_position(env.env.env.agent, env.env.obstacles[id])
+
+        if quarter == 'below-left':
+            return np.array([ -0.999, -0.999 ])
+        elif quarter == 'below-right':
+            return np.array([ 0.999, -0.999 ])
+        elif quarter == 'above-left':
+            return np.array([ -0.999, 0.999 ])
+        elif quarter == 'above-right':
+            return np.array([ 0.999, 0.999 ])
diff --git a/RLLG/envs/cartpole/confidence.py b/RLLG/envs/cartpole/confidence.py
index cdb4bacf..0f861f5e 100644
--- a/RLLG/envs/cartpole/confidence.py
+++ b/RLLG/envs/cartpole/confidence.py
@@ -1,46 +1,91 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-
-from math import exp
-
-
-class LambdaS:
-
-    def __init__(self,
-                 pos_tol=None,
-                 speed_tol=None,
-                 smoothed=False):
-        self.pos_tol = pos_tol
-        self.speed_tol = speed_tol
-        self.smoothed = smoothed
-
-    def get_use_local(self, env, observation):
-        abs_pos = abs(observation[0])
-        if self.smoothed:
-            if abs_pos < 0.5:
-                return 0
-            elif abs_pos > 1.2:
-                return 1
-            return exp(- 3 * (1.2 - abs_pos))
-        else:
-            if 1.9 - abs_pos < abs(self.pos_tol):
-                return 1
-            return 0
-
-
-def cartpole_lambda_s(expert,
-                      device="cpu",
-                      pos_tol=None,
-                      speed_tol=None,
-                      smoothed=False
-                      ):
-    return LambdaS(pos_tol=pos_tol, speed_tol=speed_tol, smoothed=smoothed)
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+from typing import Union, Any, Dict, List, Optional, Tuple, Callable
+from math import exp
+
+
+class LambdaS:
+    """
+    Class representing the confidence function.
+
+    Parameters:
+    ----------
+    pos_tol : float or None, optional
+        Position tolerance (default is None)
+    speed_tol : float or None, optional
+        Speed tolerance (default is None)
+    smoothed : bool, optional
+        Have a smooth confidence function or not (default is 3.)
+    """
+
+    def __init__(self,
+                 pos_tol: float = None,
+                 speed_tol: float = None,
+                 smoothed: bool = False):
+        self.pos_tol = pos_tol
+        self.speed_tol = speed_tol
+        self.smoothed = smoothed
+
+    def get_use_local(self, env: Any, observation: List) -> float:
+        """
+        Get the lambda s value based on the environment and observation.
+
+        Parameters:
+        ----------
+        env : Any
+            The environment
+        observation : list of array
+            The observation.
+
+        Returns:
+        ----------
+        float
+            Use_local value (0 or 1).
+        """
+        abs_pos = abs(observation[0])
+        if self.smoothed:
+            if abs_pos < 0.5:
+                return 0
+            elif abs_pos > 1.2:
+                return 1
+            return exp(- 3 * (1.2 - abs_pos))
+        else:
+            if 1.9 - abs_pos < abs(self.pos_tol):
+                return 1
+            return 0
+
+
+def cartpole_lambda_s(expert: Any,
+                      device: str = "cpu",
+                      pos_tol: float = None,
+                      speed_tol: float = None,
+                      smoothed: bool = None) -> LambdaS:
+    """
+    Returns the confidence LambdaS instance for the cartpole environment.
+
+    Parameters:
+    ----------
+    expert : Any
+        Expert (not used, but here in case the lambda_s depends on the expert).
+    device : str, optional
+        Device for computation (default is 'cpu')
+    pos_tol : float or None, optional
+        Position tolerance (default is None)
+    speed_tol : float or None, optional
+        Speed tolerance (default is None)
+    smoothed : bool or None, optional
+        Whether to use smoothed lambda_s (default is None)
+
+    Returns:
+    ----------
+    LambdaS
+        The LambdaS instance
+    """
+    return LambdaS(pos_tol=pos_tol, speed_tol=speed_tol, smoothed=smoothed)
diff --git a/RLLG/envs/cartpole/create_cartpole.py b/RLLG/envs/cartpole/create_cartpole.py
index e6fd4ba9..6c4b3408 100644
--- a/RLLG/envs/cartpole/create_cartpole.py
+++ b/RLLG/envs/cartpole/create_cartpole.py
@@ -1,60 +1,114 @@
-# 2023.02.14-Changed for RLLG
-#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
-
-# Copyright (c) 2020 dm-control (https://github.com/deepmind/dm_control).
-
-# All rights reserved.
-
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-# The initialization and termination function of the environment has been slightly changed from the original one.
-
-
-import dmc2gym
-from envs.cartpole.local_expert_policy import SafeScripted
-import os
-from types import MethodType
-
-def new_get_termination(limit_cart=0.6):
-    def new_get_termination_fn(self, physics):
-        pos = physics.named.data.qpos['slider'][0]
-        if abs(pos) > limit_cart:
-            return 1
-    return new_get_termination_fn
-
-def new_get_reward(limit_cart=0.6, reward_end=1):
-    def new_get_reward_fn(self, physics):
-        """Returns a sparse or a smooth reward, as specified in the constructor."""
-        pos = physics.named.data.qpos['slider'][0]
-        if abs(pos) > limit_cart:
-            return -reward_end
-        return self._get_reward(physics, sparse=self._sparse)
-    return new_get_reward_fn
-
-
-def create_cartpole_and_control(orig_cwd='./',
-                                device="cpu",
-                                task_name="swingup",
-                                limit_cart=0.6,
-                                reward_end=1,
-                                pos_tol=1.):
-
-    # create env
-    env = dmc2gym.make(domain_name="cartpole", task_name=task_name)
-
-    # change termination and reward function
-    env.env.task.get_termination = MethodType(new_get_termination(limit_cart=limit_cart), env.env.task)
-    env.env.task.get_reward = MethodType(new_get_reward(limit_cart=limit_cart,
-                                                        reward_end=reward_end), env.env.task)
-
-    # create controller
-    path = os.path.join(orig_cwd, 'envs', 'cartpole', "models")
-    control_dict = {
-        "SafeScripted": {
-            "coord": None,
-            "local_expert": SafeScripted()
-        },
-    }
-
-    return env, control_dict
+# 2023.02.14-Changed for RLLG
+#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
+
+# Copyright (c) 2020 dm-control (https://github.com/deepmind/dm_control).
+
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+# The initialization and termination function of the environment has been slightly changed from the original one.
+
+
+import dmc2gym
+from envs.cartpole.local_expert_policy import SafeScripted
+import os
+from types import MethodType
+from typing import Callable, Tuple, Any, Dict
+
+
+def new_get_termination(limit_cart: float = 0.6) -> Callable:
+    """
+    Return a new termination function for the Cartpole environment.
+
+    Parameters:
+    ----------
+    limit_cart : float, optional
+        Cart position limit (default is 0.6).
+
+    Returns:
+    ----------
+    Callable
+        New termination function.
+    """
+    def new_get_termination_fn(self, physics):
+        pos = physics.named.data.qpos['slider'][0]
+        if abs(pos) > limit_cart:
+            return 1
+    return new_get_termination_fn
+
+
+def new_get_reward(limit_cart: float = 0.6, reward_end: int = 1) -> Callable:
+    """
+    Return a new reward function for the Cartpole environment
+
+    Parameters:
+    ----------
+    limit_cart : float, optional
+        Cart position limit (default is 0.6)
+    reward_end : int, optional
+        Reward value when the limit is reached (default is 1)
+
+    Returns:
+    ----------
+    Callable
+        New reward function
+    """
+    def new_get_reward_fn(self, physics):
+        """Returns a sparse or a smooth reward, as specified in the constructor."""
+        pos = physics.named.data.qpos['slider'][0]
+        if abs(pos) > limit_cart:
+            return -reward_end
+        return self._get_reward(physics, sparse=self._sparse)
+    return new_get_reward_fn
+
+
+def create_cartpole_and_control(orig_cwd: str = './',
+                                device: str = "cpu",
+                                task_name: str = "swingup",
+                                limit_cart: float = 0.6,
+                                reward_end: int = 1,
+                                pos_tol: float = 1.) -> Tuple[Any, Dict]:
+    """
+    Create the Cartpole environment and its control dictionary
+
+    Parameters:
+    ----------
+    orig_cwd : str, optional
+        Original current working directory (default is './')
+    device : str, optional
+        Device (default is 'cpu')
+    task_name : str, optional
+        Task name (default is 'swingup')
+    limit_cart : float, optional
+        Cart position limit (default is 0.6)
+    reward_end : int, optional
+        Reward value when the limit is reached (default is 1)
+    pos_tol : float, optional
+        Position tolerance (default is 1.)
+
+    Returns:
+    ----------
+    Tuple[Any, dict]
+        The Cartpole environment and the control dictionary
+    """
+
+    # create env
+    env = dmc2gym.make(domain_name="cartpole", task_name=task_name)
+
+    # change termination and reward function
+    env.env.task.get_termination = MethodType(new_get_termination(limit_cart=limit_cart), env.env.task)
+    env.env.task.get_reward = MethodType(new_get_reward(limit_cart=limit_cart,
+                                                        reward_end=reward_end), env.env.task)
+
+    # create controller
+    path = os.path.join(orig_cwd, 'envs', 'cartpole', "models")
+    control_dict = {
+        "SafeScripted": {
+            "coord": None,
+            "local_expert": SafeScripted()
+        },
+    }
+
+    return env, control_dict
diff --git a/RLLG/envs/cartpole/local_expert_policy.py b/RLLG/envs/cartpole/local_expert_policy.py
index 6e8f3d2b..8e49af76 100644
--- a/RLLG/envs/cartpole/local_expert_policy.py
+++ b/RLLG/envs/cartpole/local_expert_policy.py
@@ -1,25 +1,43 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-
-import numpy as np
-
-
-class SafeScripted:
-
-    def __init__(self):
-        pass
-
-    def get_action(self, observation, init_action=None):
-        pos = observation[0]
-        if pos > 0:
-            return np.float32(np.array([-0.999]))
-        return np.float32(np.array([0.999]))
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+
+from typing import Optional
+import numpy as np
+
+
+class SafeScripted:
+    """
+    SafeScripted class for scripted control.
+    """
+
+    def __init__(self) -> None:
+        pass
+
+    def get_action(self, observation: np.ndarray, init_action: Optional[np.ndarray] = None) -> np.ndarray:
+        """
+        Get the action for scripted control.
+
+        Parameters:
+        ----------
+        observation : np.ndarray
+            The observation.
+        init_action : Any, optional
+            The initial action (default is None).
+
+        Returns:
+        ----------
+        np.ndarray
+            The scripted action.
+        """
+        pos = observation[0]
+        if pos > 0:
+            return np.float32(np.array([-0.999]))
+        return np.float32(np.array([0.999]))
diff --git a/RLLG/envs/confidence.py b/RLLG/envs/confidence.py
index 392c14b2..36d297bc 100644
--- a/RLLG/envs/confidence.py
+++ b/RLLG/envs/confidence.py
@@ -1,42 +1,65 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-from envs.cartpole.confidence import cartpole_lambda_s
-from envs.ball_in_cup.confidence import ball_in_cup_lambda_s
-from envs.point_mass.confidence import point_mass_lambda_s
-from envs.point_circle.confidence import point_circle_lambda_s
-from envs.bullet_small_reach.confidence import bullet_small_reach_lambda_s
-from envs.hirl_point_fall.confidence import hirl_point_fall_lambda_s
-
-
-dict_norm_to_expert = {
-    'cartpole': cartpole_lambda_s,
-    'ball_in_cup': ball_in_cup_lambda_s,
-    'point_mass': point_mass_lambda_s,
-    'point_circle': point_circle_lambda_s,
-    'hirl_point_fall': hirl_point_fall_lambda_s,
-    'bullet_small_reach': bullet_small_reach_lambda_s,
-}
-
-
-def global_lambda_s(glob_name,
-                    experts,
-                    device="cpu",
-                    pos_tol=None,
-                    speed_tol=None,
-                    smoothed=False
-                    ):
-    return dict_norm_to_expert[glob_name](experts,
-                                          device=device,
-                                          pos_tol=pos_tol,
-                                          speed_tol=speed_tol,
-                                          smoothed=smoothed
-                                          )
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+from typing import Union, Any, List
+import torch
+from envs.cartpole.confidence import cartpole_lambda_s
+from envs.ball_in_cup.confidence import ball_in_cup_lambda_s
+from envs.point_mass.confidence import point_mass_lambda_s
+from envs.point_circle.confidence import point_circle_lambda_s
+from envs.bullet_small_reach.confidence import bullet_small_reach_lambda_s
+from envs.hirl_point_fall.confidence import hirl_point_fall_lambda_s
+
+
+dict_norm_to_expert = {
+    'cartpole': cartpole_lambda_s,
+    'ball_in_cup': ball_in_cup_lambda_s,
+    'point_mass': point_mass_lambda_s,
+    'point_circle': point_circle_lambda_s,
+    'hirl_point_fall': hirl_point_fall_lambda_s,
+    'bullet_small_reach': bullet_small_reach_lambda_s,
+}
+
+
+def global_lambda_s(glob_name: str,
+                    experts: List[torch.nn.Module],
+                    device: str = "cpu",
+                    pos_tol: Union[float, None] = None,
+                    speed_tol: Union[float, None] = None,
+                    smoothed: bool = False) -> Any:
+    """
+    Returns the confidence lambda_s function based on the specified environment type.
+
+    Parameters:
+    ----------
+    glob_name : str
+        Name representing the environment type.
+    experts : List[torch.nn.Module]
+        List of expert models.
+    device : str, optional
+        Device for computation (default is 'cpu')
+    pos_tol : float or None, optional
+        Position tolerance (default is None)
+    speed_tol : float or None, optional
+        Speed tolerance (default is None)
+    smoothed : bool, optional
+        Whether to use smoothed lambda_s (default is False)
+
+    Returns:
+    ----------
+    Any
+        The global confidence lambda_s function.
+    """
+    return dict_norm_to_expert[glob_name](experts,
+                                          device=device,
+                                          pos_tol=pos_tol,
+                                          speed_tol=speed_tol,
+                                          smoothed=smoothed
+                                          )
diff --git a/RLLG/envs/creation.py b/RLLG/envs/creation.py
index b7876083..cd08aa5d 100644
--- a/RLLG/envs/creation.py
+++ b/RLLG/envs/creation.py
@@ -1,71 +1,87 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-from envs.cartpole.create_cartpole import create_cartpole_and_control
-from envs.ball_in_cup.create_ball_in_cup import create_ball_in_cup_and_control
-from envs.point_mass.create_point_mass import create_point_mass_and_control
-from envs.point_circle.create_point_circle import create_point_cirlce_and_control
-from envs.bullet_small_reach.create_bullet_small_reach import create_bullet_small_reach_and_control
-from envs.hirl_point_fall.create_hirl_point_fall import create_hirl_point_fall_and_control
-
-
-dict_fn = {
-    'cartpole': create_cartpole_and_control,
-    'ball_in_cup': create_ball_in_cup_and_control,
-    'point_mass': create_point_mass_and_control,
-    'point_circle': create_point_cirlce_and_control,
-    'bullet_small_reach': create_bullet_small_reach_and_control,
-    'hirl_point_fall': create_hirl_point_fall_and_control,
-}
-
-
-def get_env_and_control(name='ball_in_cup',
-                        orig_cwd='./',
-                        device='cpu',
-                        limit_cart=0.6,
-                        reward_end=1,
-                        pos_tol=1.):
-    """
-    Returns required env and local(s) controller.
-    The env is a Gym environment.
-    The local controller is a dictionary with:
-        - key: point where the linearization happened
-        - value: control function (taking the state as an argument)
-    """
-    kwargs = {}
-
-    # get glob name
-    if 'pendulum' in name:
-        glob_name = 'pendulum'
-    elif 'cartpole' in name:
-        glob_name = 'cartpole'
-        kwargs.update({'limit_cart': limit_cart, 'reward_end': reward_end, 'pos_tol': pos_tol})
-    elif 'point_mass' in name:
-        glob_name = 'point_mass'
-    elif 'hirl_point_fall' in name:
-        glob_name = 'hirl_point_fall'
-        if 'move_block_only' in name:
-            kwargs = {'move_block_only': True}
-    else:
-        glob_name = name
-
-    if "sparse" in name:
-        kwargs.update({'sparse': True})
-
-    if "cartpole" in name:
-        kwargs.update({'task_name': name.split('-')[-1]})
-
-    # get env and control
-    env, dict_control = dict_fn[glob_name](orig_cwd=orig_cwd,
-                                           device=device,
-                                           **kwargs)
-
-    return env, dict_control
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+from typing import Union, Any, Dict, List, Optional, Tuple, Callable
+from envs.cartpole.create_cartpole import create_cartpole_and_control
+from envs.ball_in_cup.create_ball_in_cup import create_ball_in_cup_and_control
+from envs.point_mass.create_point_mass import create_point_mass_and_control
+from envs.point_circle.create_point_circle import create_point_cirlce_and_control
+from envs.bullet_small_reach.create_bullet_small_reach import create_bullet_small_reach_and_control
+from envs.hirl_point_fall.create_hirl_point_fall import create_hirl_point_fall_and_control
+
+
+dict_fn = {
+    'cartpole': create_cartpole_and_control,
+    'ball_in_cup': create_ball_in_cup_and_control,
+    'point_mass': create_point_mass_and_control,
+    'point_circle': create_point_cirlce_and_control,
+    'bullet_small_reach': create_bullet_small_reach_and_control,
+    'hirl_point_fall': create_hirl_point_fall_and_control,
+}
+
+
+def get_env_and_control(name: str = 'ball_in_cup',
+                        orig_cwd: str = './',
+                        device: str = 'cpu',
+                        limit_cart: Optional[float] = 0.6,
+                        reward_end: Optional[int] = 1,
+                        pos_tol: Optional[float] = 1.) -> Tuple[Any, Dict[Union[str, Tuple[float, float]], Callable]]:
+    """
+    Returns the environment and local controller.
+
+    Parameters:
+    ----------
+    name : str, optional
+        Name of the environment (default is 'ball_in_cup')
+    orig_cwd : str, optional
+        Original working directory (default is './')
+    device : str, optional
+        Device for computation (default is 'cpu')
+    limit_cart : float, optional
+        Limit for the cart (default is 0.6)
+    reward_end : int, optional
+        Reward at the end (default is 1)
+    pos_tol : float, optional
+        Position tolerance (default is 1.0)
+
+    Returns:
+    ----------
+    Tuple[Any, Dict[Union[str, Tuple[float, float]], Callable]]
+        The environment and local controller.
+    """
+    kwargs = {}
+
+    # get glob name
+    if 'pendulum' in name:
+        glob_name = 'pendulum'
+    elif 'cartpole' in name:
+        glob_name = 'cartpole'
+        kwargs.update({'limit_cart': limit_cart, 'reward_end': reward_end, 'pos_tol': pos_tol})
+    elif 'point_mass' in name:
+        glob_name = 'point_mass'
+    elif 'hirl_point_fall' in name:
+        glob_name = 'hirl_point_fall'
+        if 'move_block_only' in name:
+            kwargs = {'move_block_only': True}
+    else:
+        glob_name = name
+
+    if "sparse" in name:
+        kwargs.update({'sparse': True})
+
+    if "cartpole" in name:
+        kwargs.update({'task_name': name.split('-')[-1]})
+
+    # get env and control
+    env, dict_control = dict_fn[glob_name](orig_cwd=orig_cwd,
+                                           device=device,
+                                           **kwargs)
+
+    return env, dict_control
diff --git a/RLLG/envs/env_utils.py b/RLLG/envs/env_utils.py
index 47ebb58a..93444a26 100644
--- a/RLLG/envs/env_utils.py
+++ b/RLLG/envs/env_utils.py
@@ -1,56 +1,115 @@
-# 2023.02.14-Changed for RLLG
-#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
-
-# Copyright (c) 2016 OpenAI (https://openai.com).
-
-# All rights reserved.
-
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-# This is taken from the gym repository
-
-import gym
-
-
-# https://github.com/openai/gym/blob/master/gym/core.py
-class NormalizedEnv(gym.ActionWrapper):
-    """ Normalize action space """
-
-    def __init__(self, env):
-        super(NormalizedEnv, self).__init__(env)
-
-    def action(self, action):
-        act_k = (self.action_space.high - self.action_space.low) / 2.
-        act_b = (self.action_space.high + self.action_space.low) / 2.
-        return act_k * action + act_b
-
-    def reverse_action(self, action):
-        act_k_inv = 2. / (self.action_space.high - self.action_space.low)
-        act_b = (self.action_space.high + self.action_space.low) / 2.
-        return act_k_inv * (action - act_b)
-
-
-class ForcedTimeLimit(gym.Wrapper):
-
-    def __init__(self, env, max_episode_steps=None):
-        super().__init__(env)
-        if max_episode_steps is None and self.env.spec is not None:
-            max_episode_steps = env.spec.max_episode_steps
-        if self.env.spec is not None:
-            self.env.spec.max_episode_steps = max_episode_steps
-        self._max_episode_steps = max_episode_steps
-        self._elapsed_steps = None
-
-    def step(self, action):
-        observation, reward, done, info = self.env.step(action)
-        done = False
-        self._elapsed_steps += 1
-        if self._elapsed_steps >= self._max_episode_steps:
-            info["TimeLimit.truncated"] = not done
-            done = True
-        return observation, reward, done, info
-
-    def reset(self, **kwargs):
-        self._elapsed_steps = 0
-        return self.env.reset(**kwargs)
+# 2023.02.14-Changed for RLLG
+#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
+
+# Copyright (c) 2016 OpenAI (https://openai.com).
+
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This is taken from the gym repository
+
+from typing import Any, Dict, List, Tuple
+import gym
+import numpy as np
+
+
+# https://github.com/openai/gym/blob/master/gym/core.py
+class NormalizedEnv(gym.ActionWrapper):
+    """ Normalize action space """
+
+    def __init__(self, env: gym.Env) -> None:
+        super(NormalizedEnv, self).__init__(env)
+
+    def action(self, action: np.ndarray) -> np.ndarray:
+        """
+        Normalize the action.
+
+        Parameters:
+        ----------
+        action : np.ndarray
+            The original action
+
+        Returns:
+        ----------
+        np.ndarray
+            The normalized action.
+        """
+        act_k = (self.action_space.high - self.action_space.low) / 2.
+        act_b = (self.action_space.high + self.action_space.low) / 2.
+        return act_k * action + act_b
+
+    def reverse_action(self, action: np.ndarray) -> np.ndarray:
+        """
+        Reverse the normalized action.
+
+        Parameters:
+        ----------
+        action : np.ndarray
+            The normalized action.
+
+        Returns:
+        ----------
+        np.ndarray
+            The original action.
+        """
+        act_k_inv = 2. / (self.action_space.high - self.action_space.low)
+        act_b = (self.action_space.high + self.action_space.low) / 2.
+        return act_k_inv * (action - act_b)
+
+
+class ForcedTimeLimit(gym.Wrapper):
+    """
+    A wrapper for enforcing a maximum number of steps in an episode.
+
+    Parameters:
+    ----------
+    env : gym.Env
+        The underlying environment.
+    max_episode_steps : Optional[int]
+        The maximum number of steps in an episode.
+    """
+
+    def __init__(self, env: gym.Env, max_episode_steps: int = None) -> None:
+        super().__init__(env)
+        if max_episode_steps is None and self.env.spec is not None:
+            max_episode_steps = env.spec.max_episode_steps
+        if self.env.spec is not None:
+            self.env.spec.max_episode_steps = max_episode_steps
+        self._max_episode_steps = max_episode_steps
+        self._elapsed_steps = None
+
+    def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, Dict]:
+        """
+        Run one timestep of the environment's dynamics, and only return done=True if max_time_steps has been reached.
+
+        Parameters:
+        ----------
+        action : np.ndarray
+            The action to be executed
+
+        Returns:
+        ----------
+        tuple
+            Observation, reward, done, and info.
+        """
+        observation, reward, done, info = self.env.step(action)
+        done = False
+        self._elapsed_steps += 1
+        if self._elapsed_steps >= self._max_episode_steps:
+            info["TimeLimit.truncated"] = not done
+            done = True
+        return observation, reward, done, info
+
+    def reset(self, **kwargs):
+        """
+        Reset the environment.
+
+        Returns:
+        ----------
+        np.ndarray
+            The initial observation.
+        """
+        self._elapsed_steps = 0
+        return self.env.reset(**kwargs)
diff --git a/RLLG/envs/hirl_point_fall/confidence.py b/RLLG/envs/hirl_point_fall/confidence.py
index 13bfacd9..46766ff1 100644
--- a/RLLG/envs/hirl_point_fall/confidence.py
+++ b/RLLG/envs/hirl_point_fall/confidence.py
@@ -1,30 +1,74 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-
-class LambdaS:
-
-    def __init__(self, pos_tol=1.):
-        self.pos_tol = pos_tol
-
-    def get_use_local(self, env, observation):
-        if int(observation[4]) == 1:
-            return 0
-        return 1
-
-
-def hirl_point_fall_lambda_s(expert,
-                             device="cpu",
-                             pos_tol=None,
-                             speed_tol=None,
-                             smoothed=False
-                             ):
-    return LambdaS()
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+from typing import Union, Any, Dict, List, Optional, Tuple, Callable
+
+
+class LambdaS:
+    """
+    Class representing the confidence function.
+
+    Parameters:
+    ----------
+    pos_tol : float or None, optional
+        Position tolerance (default is 1.)
+    """
+
+    def __init__(self, pos_tol: float = 1.):
+        self.pos_tol = pos_tol
+
+    def get_use_local(self, env: Any, observation: List) -> float:
+        """
+        Get the lambda s value based on the environment and observation.
+
+        Parameters:
+        ----------
+        env : Any
+            The environment
+        observation : list of array
+            The observation.
+
+        Returns:
+        ----------
+        float
+            Use_local value (0 or 1).
+        """
+        if int(observation[4]) == 1:
+            return 0
+        return 1
+
+
+def hirl_point_fall_lambda_s(expert: Any,
+                             device: str = "cpu",
+                             pos_tol: float = None,
+                             speed_tol: float = None,
+                             smoothed: bool = None) -> LambdaS:
+    """
+    Returns the confidence LambdaS instance for the point fall environment.
+
+    Parameters:
+    ----------
+    expert : Any
+        Expert (not used, but here in case the lambda_s depends on the expert).
+    device : str, optional
+        Device for computation (default is 'cpu')
+    pos_tol : float or None, optional
+        Position tolerance (default is None)
+    speed_tol : float or None, optional
+        Speed tolerance (default is None)
+    smoothed : bool or None, optional
+        Whether to use smoothed lambda_s (default is None)
+
+    Returns:
+    ----------
+    LambdaS
+        The LambdaS instance
+    """
+    return LambdaS()
diff --git a/RLLG/envs/hirl_point_fall/create_hirl_point_fall.py b/RLLG/envs/hirl_point_fall/create_hirl_point_fall.py
index 0ea8bd39..1af48274 100644
--- a/RLLG/envs/hirl_point_fall/create_hirl_point_fall.py
+++ b/RLLG/envs/hirl_point_fall/create_hirl_point_fall.py
@@ -1,37 +1,55 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-
-from envs.hirl_point_fall.point_fall import PointFallEnv
-from envs.hirl_point_fall.local_expert_policy import SACExpert
-from envs.hirl_point_fall.wrapper import ForcedTimeLimit
-from envs.env_utils import NormalizedEnv
-import os
-
-
-def create_hirl_point_fall_and_control(move_block_only=False,
-                                       orig_cwd='./',
-                                       device="cpu"):
-
-    init_env = PointFallEnv(move_block_only=move_block_only, scaling_factor=4, max_steps=1000)
-    env = ForcedTimeLimit(NormalizedEnv(init_env), max_episode_steps=1000)
-
-    path = os.path.join(orig_cwd, 'envs', 'hirl_point_fall', "models")
-
-    # create controller
-    control_dict = {
-        "MediumSAC": {
-            "coord": None,
-            "local_expert": SACExpert(env, path, device)
-        },
-    }
-
-    return env, control_dict
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+
+
+from typing import Any, Tuple, Dict
+from envs.hirl_point_fall.point_fall import PointFallEnv
+from envs.hirl_point_fall.local_expert_policy import SACExpert
+from envs.hirl_point_fall.wrapper import ForcedTimeLimit
+from envs.env_utils import NormalizedEnv
+import os
+
+
+def create_hirl_point_fall_and_control(move_block_only: bool = False,
+                                       orig_cwd: str = './',
+                                       device: str = "cpu") -> Tuple[Any, Dict[str, Any]]:
+    """
+    Create the Point Fall environment and its associated controller.
+
+    Parameters:
+    ----------
+    move_block_only : bool, optional
+        If True, move only the block; if False, move both the block and the robot (default is False)
+    orig_cwd : str, optional
+        Original current working directory (default is './')
+    device : str, optional
+        Device to run the environment on (default is "cpu")
+
+    Returns:
+    ----------
+    Tuple[An, Dict[str, Any]]
+        Tuple containing the environment and the controller dictionary.
+    """
+
+    init_env = PointFallEnv(move_block_only=move_block_only, scaling_factor=4, max_steps=1000)
+    env = ForcedTimeLimit(NormalizedEnv(init_env), max_episode_steps=1000)
+
+    path = os.path.join(orig_cwd, 'envs', 'hirl_point_fall', "models")
+
+    # create controller
+    control_dict = {
+        "MediumSAC": {
+            "coord": None,
+            "local_expert": SACExpert(env, path, device)
+        },
+    }
+
+    return env, control_dict
diff --git a/RLLG/envs/hirl_point_fall/local_expert_policy.py b/RLLG/envs/hirl_point_fall/local_expert_policy.py
index 94cd59a3..a834b3ab 100644
--- a/RLLG/envs/hirl_point_fall/local_expert_policy.py
+++ b/RLLG/envs/hirl_point_fall/local_expert_policy.py
@@ -1,47 +1,75 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-
-import numpy as np
-import torch
-import os
-
-class SACExpert:
-
-    def __init__(self, env, path, device="cpu"):
-
-        from agents.common.model import TanhGaussianPolicy, SamplerPolicy
-        # hyper-params
-        policy_arch = '32-32'
-        policy_log_std_multiplier = 1.0
-        policy_log_std_offset = -1.0
-
-        # load expert policy
-        expert_policy = TanhGaussianPolicy(
-            env.observation_space.shape[0],
-            env.action_space.shape[0],
-            policy_arch,
-            log_std_multiplier=policy_log_std_multiplier,
-            log_std_offset=policy_log_std_offset,
-        )
-        glob_path = os.path.join(path, 'medium_sac')
-        expert_policy.load_state_dict(torch.load(glob_path))
-        expert_policy.to(device)
-        self.sampling_expert_policy = SamplerPolicy(expert_policy, device=device)
-
-    def get_action(self, observation, init_action=None, env=None):
-        with torch.no_grad():
-            expert_action = self.sampling_expert_policy(
-                np.expand_dims(observation, 0), deterministic=True
-            )[0, :]
-        # to further decrease performance
-        expert_action[0] *= 0.2
-        return np.clip(expert_action, a_min=-0.99, a_max=0.99)  # expert_action
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+from typing import Any, Optional
+import numpy as np
+import torch
+import os
+
+
+class SACExpert:
+    """
+    Soft Actor-Critic (SAC) Expert.
+
+    Parameters:
+    ----------
+    env : Any
+        The environment (usually dm control env, could be gym as well or others).
+    path : str
+        The path to the model.
+    device : str, optional
+        The device to run the expert policy (default is 'cpu').
+    """
+
+    def __init__(self, env: Any, path: str, device: Optional[str] = "cpu") -> None:
+
+        from agents.common.model import TanhGaussianPolicy, SamplerPolicy
+        # hyper-params
+        policy_arch = '32-32'
+        policy_log_std_multiplier = 1.0
+        policy_log_std_offset = -1.0
+
+        # load expert policy
+        expert_policy = TanhGaussianPolicy(
+            env.observation_space.shape[0],
+            env.action_space.shape[0],
+            policy_arch,
+            log_std_multiplier=policy_log_std_multiplier,
+            log_std_offset=policy_log_std_offset,
+        )
+        glob_path = os.path.join(path, 'medium_sac')
+        expert_policy.load_state_dict(torch.load(glob_path))
+        expert_policy.to(device)
+        self.sampling_expert_policy = SamplerPolicy(expert_policy, device=device)
+
+    def get_action(self, observation: np.ndarray, init_action: Any = None, env: Any = None) -> np.ndarray:
+        """
+        Get an action from the SAC expert policy.
+
+        Parameters:
+        ----------
+        observation : numpy.ndarray
+            The observation from the environment.
+        init_action : Any, optional
+            Initial action (default is None).
+        env : gym.Env, optional
+            The environment (default is None).
+
+        Returns:
+        ----------
+        numpy.ndarray
+            The clipped expert action.
+        """
+        with torch.no_grad():
+            expert_action = self.sampling_expert_policy(
+                np.expand_dims(observation, 0), deterministic=True
+            )[0, :]
+        # to further decrease performance
+        expert_action[0] *= 0.2
+        return np.clip(expert_action, a_min=-0.99, a_max=0.99)  # expert_action
diff --git a/RLLG/envs/hirl_point_fall/wrapper.py b/RLLG/envs/hirl_point_fall/wrapper.py
index b89f807c..8a5a62e1 100644
--- a/RLLG/envs/hirl_point_fall/wrapper.py
+++ b/RLLG/envs/hirl_point_fall/wrapper.py
@@ -1,37 +1,72 @@
-# 2023.02.14-Changed for RLLG
-#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
-
-# Copyright (c) 2016 OpenAI (https://openai.com).
-
-# All rights reserved.
-
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-
-import gym
-
-
-class ForcedTimeLimit(gym.Wrapper):
-
-    def __init__(self, env, max_episode_steps=None):
-        super().__init__(env)
-        if max_episode_steps is None and self.env.spec is not None:
-            max_episode_steps = env.spec.max_episode_steps
-        if self.env.spec is not None:
-            self.env.spec.max_episode_steps = max_episode_steps
-        self._max_episode_steps = max_episode_steps
-        self._elapsed_steps = None
-
-    def step(self, action):
-        observation, reward, done, info = self.env.step(action)
-        done = False
-        self._elapsed_steps += 1
-        if self._elapsed_steps >= self._max_episode_steps:
-            info["TimeLimit.truncated"] = not done
-            done = True
-        return observation, reward, done, info
-
-    def reset(self, **kwargs):
-        self._elapsed_steps = 0
-        return self.env.reset(**kwargs)
+# 2023.02.14-Changed for RLLG
+#            Huawei Technologies Co., Ltd. <paul.daoudi1@huawei.com>
+
+# Copyright (c) 2016 OpenAI (https://openai.com).
+
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import gym
+import numpy as np
+from typing import Any, Optional, Dict, Tuple
+
+
+class ForcedTimeLimit(gym.Wrapper):
+    """
+    A wrapper for enforcing a maximum number of steps in an episode.
+
+    Parameters:
+    ----------
+    env : gym.Env
+        The underlying environment.
+    max_episode_steps : Optional[int]
+        The maximum number of steps in an episode.
+    elapsed_steps : Optional[int]
+        The number of steps taken in the current episode.
+    """
+
+    def __init__(self, env: gym.Env, max_episode_steps: Optional[int] = None) -> None:
+        super().__init__(env)
+        if max_episode_steps is None and self.env.spec is not None:
+            max_episode_steps = env.spec.max_episode_steps
+        if self.env.spec is not None:
+            self.env.spec.max_episode_steps = max_episode_steps
+        self._max_episode_steps = max_episode_steps
+        self._elapsed_steps = None
+
+    def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, Dict]:
+        """
+        Take a step in the environment.
+
+        Parameters:
+        ----------
+        action : Any
+            The action to be taken.
+
+        Returns:
+        ----------
+        Tuple
+            The observation, reward, done, and info.
+        """
+        observation, reward, done, info = self.env.step(action)
+        done = False
+        self._elapsed_steps += 1
+        if self._elapsed_steps >= self._max_episode_steps:
+            info["TimeLimit.truncated"] = not done
+            done = True
+        return observation, reward, done, info
+
+    def reset(self, **kwargs: Any) -> Any:
+        """
+        Reset the environment.
+
+        Returns:
+        ----------
+        Any
+            The initial observation.
+        """
+        self._elapsed_steps = 0
+        return self.env.reset(**kwargs)
diff --git a/RLLG/envs/point_circle/confidence.py b/RLLG/envs/point_circle/confidence.py
index 82a77c39..49475314 100644
--- a/RLLG/envs/point_circle/confidence.py
+++ b/RLLG/envs/point_circle/confidence.py
@@ -1,30 +1,75 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-class LambdaS:
-
-    def __init__(self, pos_tol=0.6):
-        self.pos_tol = pos_tol
-
-    def get_use_local(self, env, observation):
-        x_pos, y_pos, z_pos = env.env.world.robot_pos()
-        if abs(x_pos) > self.pos_tol:
-            return 1
-        return 0
-
-
-def point_circle_lambda_s(expert,
-                          device="cpu",
-                          pos_tol=None,
-                          speed_tol=None,
-                          smoothed=False
-                          ):
-    return LambdaS(pos_tol=pos_tol)
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+from typing import Union, Any, Dict, List, Optional, Tuple, Callable
+
+
+class LambdaS:
+    """
+    Class representing the confidence function.
+
+    Parameters:
+    ----------
+    pos_tol : float or None, optional
+        Position tolerance (default is 0.6)
+    """
+
+    def __init__(self, pos_tol: float =0.6):
+        self.pos_tol = pos_tol
+
+    def get_use_local(self, env: Any, observation: List) -> float:
+        """
+        Get the lambda s value based on the environment and observation.
+
+        Parameters:
+        ----------
+        env : Any
+            The environment
+        observation : list of array
+            The observation.
+
+        Returns:
+        ----------
+        float
+            Use_local value (0 or 1).
+        """
+        x_pos, y_pos, z_pos = env.env.world.robot_pos()
+        if abs(x_pos) > self.pos_tol:
+            return 1
+        return 0
+
+
+def point_circle_lambda_s(expert: Any,
+                          device: str = "cpu",
+                          pos_tol: float = None,
+                          speed_tol: float = None,
+                          smoothed: bool = None) -> LambdaS:
+    """
+    Returns the confidence LambdaS instance for the point circle environment.
+
+    Parameters:
+    ----------
+    expert : Any
+        Expert (not used, but here in case the lambda_s depends on the expert).
+    device : str, optional
+        Device for computation (default is 'cpu')
+    pos_tol : float or None, optional
+        Position tolerance (default is None)
+    speed_tol : float or None, optional
+        Speed tolerance (default is None)
+    smoothed : bool or None, optional
+        Whether to use smoothed lambda_s (default is None)
+
+    Returns:
+    ----------
+    LambdaS
+        The LambdaS instance
+    """
+    return LambdaS(pos_tol=pos_tol)
diff --git a/RLLG/envs/point_circle/create_point_circle.py b/RLLG/envs/point_circle/create_point_circle.py
index b909c6d9..d4091062 100644
--- a/RLLG/envs/point_circle/create_point_circle.py
+++ b/RLLG/envs/point_circle/create_point_circle.py
@@ -1,44 +1,59 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-
-try:
-    from safety_gym.envs.engine import Engine
-except ModuleNotFoundError:
-    pass
-from envs.point_circle.point_circle import PointCircle
-from envs.point_circle.local_expert_policy import SafeScripted
-import os
-from types import MethodType
-
-
-def create_point_cirlce_and_control(orig_cwd='./',
-                                    device="cpu"):
-    config_dict = {
-        'robot_base': 'xmls/point.xml',
-        'task': 'circle',
-        'observe_goal_lidar': False,
-        'observe_box_lidar': False,
-        'observe_circle': True,
-        'lidar_max_dist': 6
-    }
-    init_env = Engine(config=config_dict)
-    env = PointCircle(init_env)
-
-    # create controller
-    control_dict = {
-        "SafeScripted": {
-            "coord": None,
-            "local_expert": SafeScripted()
-        },
-    }
-
-    return env, control_dict
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+
+
+try:
+    from safety_gym.envs.engine import Engine
+except ModuleNotFoundError:
+    pass
+from typing import Any, Tuple, Dict
+from envs.point_circle.point_circle import PointCircle
+from envs.point_circle.local_expert_policy import SafeScripted
+import os
+
+
+def create_point_cirlce_and_control(orig_cwd: str ='./',
+                                    device: str ="cpu") -> Tuple[Any, Dict]:
+    """
+    Create the Point Circle environment and its associated controller.
+
+    Parameters:
+    ----------
+    orig_cwd : str, optional
+        Original current working directory (default is './')
+    device : str, optional
+        Device to run the environment on (default is "cpu")
+
+    Returns:
+    ----------
+    Tuple[Any, Dict[str, Any]]
+        Tuple containing the environment and the controller dictionary.
+    """
+    config_dict = {
+        'robot_base': 'xmls/point.xml',
+        'task': 'circle',
+        'observe_goal_lidar': False,
+        'observe_box_lidar': False,
+        'observe_circle': True,
+        'lidar_max_dist': 6
+    }
+    init_env = Engine(config=config_dict)
+    env = PointCircle(init_env)
+
+    # create controller
+    control_dict = {
+        "SafeScripted": {
+            "coord": None,
+            "local_expert": SafeScripted()
+        },
+    }
+
+    return env, control_dict
diff --git a/RLLG/envs/point_circle/local_expert_policy.py b/RLLG/envs/point_circle/local_expert_policy.py
index 0e04c3a8..bb302e81 100644
--- a/RLLG/envs/point_circle/local_expert_policy.py
+++ b/RLLG/envs/point_circle/local_expert_policy.py
@@ -1,50 +1,71 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-
-import numpy as np
-import torch
-import os
-
-
-class SafeScripted:
-
-    def __init__(self):
-        pass
-
-    def get_action(self, observation, init_action=None, env=None):
-        x_pos, y_pos, z_pos = env.env.world.robot_pos()
-        rot_mat = env.env.world.robot_mat()
-        theta = np.arctan2(-rot_mat[0, 1], rot_mat[0, 0])
-        if x_pos > 0:
-            if abs(theta) >= 3 * np.pi / 4:
-                if y_pos > 0 and theta > 0:
-                    return np.array([0.999, 0.5])
-                elif y_pos < 0 and theta < 0:
-                    return np.array([0.999, -0.5])
-                else:
-                    return np.array([0.999, 0])
-            elif theta < 0:
-                return np.array([-0.999, -0.999])
-            else:
-                return np.array([-0.999, 0.999])
-        else:
-            if abs(theta) <= np.pi / 4:
-                if y_pos > 0 and theta > 0:
-                    return np.array([0.999, -0.5])
-                elif y_pos < 0 and theta < 0:
-                    return np.array([0.999, 0.5])
-                else:
-                    return np.array([0.999, 0])
-            elif theta < 0:
-                return np.array([-0.999, 0.999])
-            else:
-                return np.array([-0.999, -0.999])
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+
+from typing import Any, Optional
+import numpy as np
+import torch
+import os
+
+
+class SafeScripted:
+    """
+    SafeScripted class for scripted control.
+    """
+
+    def __init__(self) -> None:
+        pass
+
+    def get_action(self, observation: np.ndarray, init_action: Optional[Any] = None, env: Optional[Any] = None)\
+            -> np.ndarray:
+        """
+        Get the action for scripted control.
+
+        Parameters:
+        ----------
+        observation : Any
+            The observation.
+        init_action : Any, optional
+            The initial action (default is None).
+        env : Any, optional
+            The environment object (default is None).
+
+        Returns:
+        ----------
+        np.ndarray
+            The scripted action.
+        """
+        x_pos, y_pos, z_pos = env.env.world.robot_pos()
+        rot_mat = env.env.world.robot_mat()
+        theta = np.arctan2(-rot_mat[0, 1], rot_mat[0, 0])
+        if x_pos > 0:
+            if abs(theta) >= 3 * np.pi / 4:
+                if y_pos > 0 and theta > 0:
+                    return np.array([0.999, 0.5])
+                elif y_pos < 0 and theta < 0:
+                    return np.array([0.999, -0.5])
+                else:
+                    return np.array([0.999, 0])
+            elif theta < 0:
+                return np.array([-0.999, -0.999])
+            else:
+                return np.array([-0.999, 0.999])
+        else:
+            if abs(theta) <= np.pi / 4:
+                if y_pos > 0 and theta > 0:
+                    return np.array([0.999, -0.5])
+                elif y_pos < 0 and theta < 0:
+                    return np.array([0.999, 0.5])
+                else:
+                    return np.array([0.999, 0])
+            elif theta < 0:
+                return np.array([-0.999, 0.999])
+            else:
+                return np.array([-0.999, -0.999])
diff --git a/RLLG/envs/point_circle/point_circle.py b/RLLG/envs/point_circle/point_circle.py
index aef99a6d..90bbd8a5 100644
--- a/RLLG/envs/point_circle/point_circle.py
+++ b/RLLG/envs/point_circle/point_circle.py
@@ -1,31 +1,75 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-
-class PointCircle:
-
-    def __init__(self, env):
-        self.env = env
-        self.observation_space = self.env.observation_space
-        self.action_space = self.env.action_space
-
-    def step(self, action):
-        obs, reward, done, info = self.env.step(action)
-        if info['cost'] >= 0.5:
-            reward = -1000
-            done = True
-        return obs, reward, done, info
-
-    def render(self, mode="human"):
-        return self.env.render(mode)
-
-    def reset(self):
-        return self.env.reset()
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+from typing import Any, Tuple, Dict, Optional
+import numpy as np
+
+
+class PointCircle:
+    """
+    Wrapper for the safe PointCircke environment to change the constraint function into a bad reward.
+
+    Parameters:
+    ----------
+    env : Any
+        The environment to wrap.
+    """
+
+    def __init__(self, env: Any) -> None:
+        self.env = env
+        self.observation_space = self.env.observation_space
+        self.action_space = self.env.action_space
+
+    def step(self, action: np.ndarray) -> Tuple[Any, float, bool, Dict]:
+        """
+        Step through the environment dynamics and change reward function.
+
+        Parameters:
+        ----------
+        action : Any
+            The action to be executed.
+
+        Returns:
+        ----------
+        tuple
+            Observation, reward, done, and info.
+        """
+        obs, reward, done, info = self.env.step(action)
+        if info['cost'] >= 0.5:
+            reward = -1000
+            done = True
+        return obs, reward, done, info
+
+    def render(self, mode: Optional[str] = "human") -> Any:
+        """
+        Render the environment.
+
+        Parameters:
+        ----------
+        mode : str, optional
+            Rendering mode (default is "human").
+
+        Returns:
+        ----------
+        Any
+            The rendering output.
+        """
+        return self.env.render(mode)
+
+    def reset(self) -> np.ndarray:
+        """
+        Reset the environment.
+
+        Returns:
+        ----------
+        Any
+            The reset observation.
+        """
+        return self.env.reset()
diff --git a/RLLG/envs/point_mass/confidence.py b/RLLG/envs/point_mass/confidence.py
index 72023509..16499be9 100644
--- a/RLLG/envs/point_mass/confidence.py
+++ b/RLLG/envs/point_mass/confidence.py
@@ -1,37 +1,83 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-import numpy as np
-from dm_control.utils import rewards
-
-
-class LambdaS:
-
-    def __init__(self, pos_tol=None, speed_tol=None):
-        self.pos_tol = pos_tol
-        self.speed_tol = speed_tol
-
-
-    def get_use_local(self, env, observation):
-        # check if inside big target or not
-        target_size = 0.1                  # env.env.physics.named.model.geom_size['target', 0]
-        inside_big_goal = rewards.tolerance(env.env.physics.mass_to_target_dist(),
-                                            bounds=(0, target_size))
-        if inside_big_goal:
-            return 0
-        return 1
-
-def point_mass_lambda_s(expert,
-                        device="cpu",
-                        pos_tol=None,
-                        speed_tol=None,
-                        smoothed=None):
-    return LambdaS()
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+from typing import Union, Any, Dict, List, Optional, Tuple, Callable
+import numpy as np
+from dm_control.utils import rewards
+
+
+class LambdaS:
+    """
+    Class representing the confidence function.
+
+    Parameters:
+    ----------
+    pos_tol : float or None, optional
+        Position tolerance (default is None)
+    speed_tol : float or None, optional
+        Speed tolerance (default is None)
+    """
+
+    def __init__(self, pos_tol: float = None, speed_tol: float = None):
+        self.pos_tol = pos_tol
+        self.speed_tol = speed_tol
+
+
+    def get_use_local(self, env: Any, observation: List) -> float:
+        """
+        Get the lambda s value based on the environment and observation.
+
+        Parameters:
+        ----------
+        env : Any
+            The environment
+        observation : list of array
+            The observation.
+
+        Returns:
+        ----------
+        float
+            Use_local value (0 or 1).
+        """
+        # check if inside big target or not
+        target_size = 0.1                  # env.env.physics.named.model.geom_size['target', 0]
+        inside_big_goal = rewards.tolerance(env.env.physics.mass_to_target_dist(),
+                                            bounds=(0, target_size))
+        if inside_big_goal:
+            return 0
+        return 1
+
+def point_mass_lambda_s(expert: Any,
+                        device: str = "cpu",
+                        pos_tol: float = None,
+                        speed_tol: float = None,
+                        smoothed: bool = None) -> LambdaS:
+    """
+    Returns the confidence LambdaS instance for the point mass environment.
+
+    Parameters:
+    ----------
+    expert : Any
+        Expert (not used, but here in case the lambda_s depends on the expert).
+    device : str, optional
+        Device for computation (default is 'cpu')
+    pos_tol : float or None, optional
+        Position tolerance (default is None)
+    speed_tol : float or None, optional
+        Speed tolerance (default is None)
+    smoothed : bool or None, optional
+        Whether to use smoothed lambda_s (default is None)
+
+    Returns:
+    ----------
+    LambdaS
+        The LambdaS instance
+    """
+    return LambdaS()
diff --git a/RLLG/envs/point_mass/create_point_mass.py b/RLLG/envs/point_mass/create_point_mass.py
index 8a98c1c1..67cfb302 100644
--- a/RLLG/envs/point_mass/create_point_mass.py
+++ b/RLLG/envs/point_mass/create_point_mass.py
@@ -1,55 +1,86 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-
-import dmc2gym
-from envs.point_mass.local_expert_policy import SACExpert
-import os
-from types import MethodType
-from dm_control.utils import rewards
-
-
-# modify initialization
-def new_get_reward(self, physics):
-    """Returns a reward to the agent."""
-    target_size = physics.named.model.geom_size['target', 0]
-    near_target = rewards.tolerance(physics.mass_to_target_dist(),
-                                    bounds=(0, target_size))
-    control_reward = rewards.tolerance(physics.control(), margin=1,
-                                       value_at_margin=0,
-                                       sigmoid='quadratic').mean()
-    small_control = (control_reward + 4) / 5
-    return near_target * small_control
-
-
-def create_point_mass_and_control(orig_cwd='./',
-                                  device="cpu",
-                                  sparse=False):
-    # create env
-    env = dmc2gym.make('point_mass', 'easy')
-
-    # modify target (to create simple task)
-    if sparse:
-        env.env._env._task.get_reward = MethodType(new_get_reward, env.env._env._task)
-
-    # env.env._env.physics.named.model.geom_size['target', 0] = 0.1
-    # env.env._env._task._target_size = 0.1
-
-    path = os.path.join(orig_cwd, 'envs', 'point_mass', 'models')
-
-    control_dict = {
-        "MediumSAC": {
-            "coord": None,
-            "local_expert": SACExpert(env, path, device)
-        },
-    }
-
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+
+from typing import  Any, Tuple, Dict
+import dmc2gym
+from envs.point_mass.local_expert_policy import SACExpert
+import os
+from types import MethodType
+from dm_control.utils import rewards
+
+
+# modify initialization
+def new_get_reward(self: Any, physics: Any) -> float:
+    """
+    Returns a reward to the agent.
+
+    Parameters:
+    ----------
+    self : Any
+        Instance of the environment task
+    physics : Any
+        Physics object representing the state of the environment
+
+    Returns:
+    ----------
+    float
+        Computed reward for the agent.
+    """
+    target_size = physics.named.model.geom_size['target', 0]
+    near_target = rewards.tolerance(physics.mass_to_target_dist(),
+                                    bounds=(0, target_size))
+    control_reward = rewards.tolerance(physics.control(), margin=1,
+                                       value_at_margin=0,
+                                       sigmoid='quadratic').mean()
+    small_control = (control_reward + 4) / 5
+    return near_target * small_control
+
+
+def create_point_mass_and_control(orig_cwd: str = './',
+                                  device: str = "cpu",
+                                  sparse: bool = False) -> Tuple[Any, Dict[str, Any]]:
+    """
+    Create the Point Mass environment and its associated controller.
+
+    Parameters:
+    ----------
+    orig_cwd : str, optional
+        Original current working directory (default is './')
+    device : str, optional
+        Device to run the environment on (default is "cpu")
+    sparse : bool, optional
+        Flag indicating whether to use sparse rewards (default is False)
+
+    Returns:
+    ----------
+    Tuple[Any, Dict[str, Any]]
+        Tuple containing the environment and the controller dictionary
+    """
+    # create env
+    env = dmc2gym.make('point_mass', 'easy')
+
+    # modify target (to create simple task)
+    if sparse:
+        env.env._env._task.get_reward = MethodType(new_get_reward, env.env._env._task)
+
+    # env.env._env.physics.named.model.geom_size['target', 0] = 0.1
+    # env.env._env._task._target_size = 0.1
+
+    path = os.path.join(orig_cwd, 'envs', 'point_mass', 'models')
+
+    control_dict = {
+        "MediumSAC": {
+            "coord": None,
+            "local_expert": SACExpert(env, path, device)
+        },
+    }
+
     return env, control_dict
\ No newline at end of file
diff --git a/RLLG/envs/point_mass/local_expert_policy.py b/RLLG/envs/point_mass/local_expert_policy.py
index 9aa58a0f..71eb0dd1 100644
--- a/RLLG/envs/point_mass/local_expert_policy.py
+++ b/RLLG/envs/point_mass/local_expert_policy.py
@@ -1,46 +1,75 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-
-
-import numpy as np
-import torch
-import os
-
-class SACExpert:
-
-    def __init__(self, env, path, device="cpu"):
-
-        from agents.common.model import TanhGaussianPolicy, SamplerPolicy
-        # hyper-params
-        policy_arch = '64-64'
-        policy_log_std_multiplier = 1.0
-        policy_log_std_offset = -1.0
-
-        # load expert policy
-        expert_policy = TanhGaussianPolicy(
-            env.observation_space.shape[0],
-            env.action_space.shape[0],
-            policy_arch,
-            log_std_multiplier=policy_log_std_multiplier,
-            log_std_offset=policy_log_std_offset,
-        )
-        glob_path = os.path.join(path, 'medium_expert_sac')
-
-        expert_policy.load_state_dict(torch.load(glob_path))
-        expert_policy.to(device)
-        self.sampling_expert_policy = SamplerPolicy(expert_policy, device=device)
-
-    def get_action(self, observation, init_action=None, env=None):
-        with torch.no_grad():
-            expert_action = self.sampling_expert_policy(
-                np.expand_dims(observation, 0), deterministic=True
-            )[0, :]
-        return np.clip(expert_action, a_min=-0.99, a_max=0.99)  # expert_action
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+
+from typing import Any, Optional
+import numpy as np
+import torch
+import os
+
+
+class SACExpert:
+    """
+    Soft Actor-Critic (SAC) Expert.
+
+    Parameters:
+    ----------
+    env : Any
+        The environment (usually dm control env, could be gym as well or others).
+    path : str
+        The path to the model.
+    device : str, optional
+        The device to run the expert policy (default is 'cpu').
+    """
+
+    def __init__(self, env: Any, path: str, device: Optional[str] = "cpu") -> None:
+        from agents.common.model import TanhGaussianPolicy, SamplerPolicy
+        # hyper-params
+        policy_arch = '64-64'
+        policy_log_std_multiplier = 1.0
+        policy_log_std_offset = -1.0
+
+        # load expert policy
+        expert_policy = TanhGaussianPolicy(
+            env.observation_space.shape[0],
+            env.action_space.shape[0],
+            policy_arch,
+            log_std_multiplier=policy_log_std_multiplier,
+            log_std_offset=policy_log_std_offset,
+        )
+        glob_path = os.path.join(path, 'medium_expert_sac')
+
+        expert_policy.load_state_dict(torch.load(glob_path))
+        expert_policy.to(device)
+        self.sampling_expert_policy = SamplerPolicy(expert_policy, device=device)
+
+    def get_action(self, observation: np.ndarray, init_action: Any = None, env: Any = None) -> np.ndarray:
+        """
+        Get an action from the SAC expert policy.
+
+        Parameters:
+        ----------
+        observation : numpy.ndarray
+            The observation from the environment.
+        init_action : Any, optional
+            Initial action (default is None).
+        env : gym.Env, optional
+            The environment (default is None).
+
+        Returns:
+        ----------
+        numpy.ndarray
+            The clipped expert action.
+        """
+        with torch.no_grad():
+            expert_action = self.sampling_expert_policy(
+                np.expand_dims(observation, 0), deterministic=True
+            )[0, :]
+        return np.clip(expert_action, a_min=-0.99, a_max=0.99)  # expert_action
diff --git a/RLLG/main.py b/RLLG/main.py
index eb2c50fa..da6c283e 100644
--- a/RLLG/main.py
+++ b/RLLG/main.py
@@ -1,129 +1,131 @@
-# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
-
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the MIT license.
-
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE. See the MIT License for more details.
-
-
-import os
-import numpy as np
-import yaml
-from ray import tune
-from ray.tune import run
-
-from agents.common.utils import get_global_name, get_global_agent_name
-from sac_main_fn import main as sac_main
-
-os.environ["Timer"] = '1'
-
-
-def trial_name(trial, hp_to_write):
-    ti = 'repeat_run'
-    identifier = ','.join([f'{hp}={trial.config[hp]}' for hp in hp_to_write]) + \
-                 f',trial={trial.config[ti]},id={trial.trial_id}'
-    return identifier
-
-
-if __name__ == '__main__':
-
-    envs = ['ball_in_cup']
-    agents = [
-        'SAC',
-        # 'SAG',
-        # 'PIG',
-        # 'PAG',
-    ]
-    nb_local_experts = 'simple'
-
-    for env in envs:
-
-        glob_name = get_global_name(env)
-
-        with open(os.path.join(os.getcwd(), 'ray_config', f'{glob_name}_cfg.yaml')) as f:
-            config = yaml.safe_load(f)
-            np.random.seed(config['seed'])
-            del config['seed']
-
-        config['orig_cwd'] = os.getcwd()
-        config['env'] = env
-        config['glob_name'] = glob_name
-        config['device'] = 'cpu'
-
-        # get some hyperparms and remove them from dict
-        expert_names = config['local_experts']
-        del config['local_experts']
-        dict_pos_tol = None
-        if 'pos_tol' in config:
-            dict_pos_tol = config['pos_tol']
-            del config['pos_tol']
-        dict_beta = config['beta']
-        dict_delta = config['delta']
-        dict_phi = config['phi']
-        del config['beta']
-        del config['delta']
-        del config['phi']
-        decay_parameter_list = config['decay_parameter']
-        del config['decay_parameter']
-
-        for expert in expert_names:
-
-            config['expert'] = expert
-
-            for agent_name in agents:
-
-                # agent name
-                glob_agent_name = get_global_agent_name(agent_name)
-                config['agent_name'] = agent_name
-
-                # get hyperparameters
-                if dict_pos_tol is not None:
-                    config['pos_tol'] = dict_pos_tol[agent_name]
-                config['beta'] = dict_beta[agent_name]
-                config['delta'] = dict_delta[agent_name]
-                config['phi'] = dict_phi[agent_name]
-
-                # decay or not
-                agent_name_to_show = agent_name
-                if agent_name in ['SAC', 'SAG', 'NaiveSAG']:
-                    decay_parameter_list = [ False ]
-
-                for decay_parameter in decay_parameter_list:
-
-                    config['decay_parameter'] = decay_parameter
-                    if decay_parameter:
-                        agent_name_to_show = 'Decreased' + agent_name_to_show
-                    else:
-                        # to avoid unecessary runs
-                        config['delta'] = [ 1 ]
-
-                    # ray preparation
-                    hps = [k for k, v in config.items() if type(v) is list]
-                    config_ray = config.copy()
-                    config_ray = {k: tune.grid_search(v) if type(v) is list else v for k, v in config.items()}
-                    config_ray['repeat_run'] = tune.grid_search(list(range(config['repeat_run'])))
-                    metric_columns = ['epoch', 'average_return', 'mean_avg_return', 'epoch_time']
-                    reporter = tune.CLIReporter(parameter_columns=hps, metric_columns=metric_columns)
-
-                    env_name_folder = env
-
-                    if agent_name in ['SAC']:
-                        save_path = f'./ray_results_test/{env_name_folder}/{agent_name_to_show}'
-                    else:
-                        save_path = f'./ray_results_test/{env_name_folder}/{agent_name_to_show}/{expert}'
-
-                    analysis = run(
-                        sac_main,
-                        config=config_ray,
-                        metric=config_ray['metric'],
-                        mode=config_ray['mode'],
-                        resources_per_trial={"cpu": 1, "gpu": 1 if config_ray['device'] == 'cuda' else 0},
-                        max_concurrent_trials=15,
-                        log_to_file=True,
-                        local_dir=save_path,
-                        trial_name_creator=lambda t: trial_name(t, hps),
-                        trial_dirname_creator=lambda t: trial_name(t, hps),
-                        progress_reporter=reporter,
-                        verbose=1)  # resume=True,
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the MIT license.
+
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the MIT License for more details.
+
+
+from typing import List
+import os
+import numpy as np
+import yaml
+from ray import tune
+from ray.tune import run, Experiment
+
+from agents.common.utils import get_global_name, get_global_agent_name
+from agents.common.config import process_glob_config, process_config_per_agent
+from sac_main_fn import main as sac_main
+
+os.environ["Timer"] = '1'
+
+
+def trial_name(trial: Experiment, hp_to_write: List[str]) -> str:
+    """
+    Generate a unique identifier for a trial based on specified hyperparameters and trial information.
+
+    Parameters:
+    ----------
+    trial : ray.tune.Experiment
+        The Ray Tune Experiment for which to generate the identifier.
+    hp_to_write : List[str]
+        List of hyperparameter names to include in the identifier.
+
+    Returns:
+    ----------
+    str
+        The generated trial identifier.
+    """
+    ti = 'repeat_run'
+    identifier = ','.join([f'{hp}={trial.config[hp]}' for hp in hp_to_write]) + \
+                 f',trial={trial.config[ti]},id={trial.trial_id}'
+    return identifier
+
+
+if __name__ == '__main__':
+
+    envs = ['ball_in_cup']
+    agents = [
+        'SAC',
+        'SAG',
+        'PIG',
+        'PAG',
+    ]
+
+    for env in envs:
+        # get global name to retrieve configs
+        glob_name = get_global_name(env)
+
+        # retrieve config
+        with open(os.path.join(os.getcwd(), 'ray_config', f'{glob_name}_cfg.yaml')) as f:
+            config = yaml.safe_load(f)
+            np.random.seed(config['seed'])
+            del config['seed']
+
+        # add important elements to the config file
+        config['orig_cwd'] = os.getcwd()
+        config['env'] = env
+        config['glob_name'] = glob_name
+        config['device'] = 'cpu'
+
+        # process config and retrieve elements for loops
+        expert_names, dict_pos_tol, dict_beta, dict_delta, dict_phi, decay_parameter_list = process_glob_config(config)
+
+        # loop over experts (if multiple experts)
+        for expert in expert_names:
+
+            config['expert'] = expert
+
+            # loop over agents (if multiple agents)
+            for agent_name in agents:
+
+                # agent name
+                glob_agent_name = get_global_agent_name(agent_name)
+                config['agent_name'] = agent_name
+
+                # further process hyperparamers to make them dependent on agent
+                process_config_per_agent(config, agent_name, dict_beta, dict_delta, dict_phi, dict_pos_tol)
+
+                # decay or not (only relevant for PAG)
+                agent_name_to_show = agent_name
+                if agent_name in ['SAC', 'SAG', 'NaiveSAG']:
+                    decay_parameter_list = [False]
+
+                for decay_parameter in decay_parameter_list:
+
+                    # to avoid unecessary runs
+                    config['decay_parameter'] = decay_parameter
+                    if decay_parameter:
+                        agent_name_to_show = 'Decreased' + agent_name_to_show
+                    else:
+                        # to avoid unecessary runs
+                        config['delta'] = [1]
+
+                    # ray preparation
+                    hps = [k for k, v in config.items() if type(v) is list]
+                    config_ray = config.copy()
+                    config_ray = {k: tune.grid_search(v) if type(v) is list else v for k, v in config.items()}
+                    config_ray['repeat_run'] = tune.grid_search(list(range(config['repeat_run'])))
+                    metric_columns = ['epoch', 'average_return', 'mean_avg_return', 'epoch_time']
+                    reporter = tune.CLIReporter(parameter_columns=hps, metric_columns=metric_columns)
+                    env_name_folder = env
+                    if agent_name in ['SAC']:
+                        save_path = f'./ray_results_test/{env_name_folder}/{agent_name_to_show}'
+                    else:
+                        save_path = f'./ray_results_test/{env_name_folder}/{agent_name_to_show}/{expert}'
+
+                    analysis = run(
+                        sac_main,
+                        config=config_ray,
+                        metric=config_ray['metric'],
+                        mode=config_ray['mode'],
+                        resources_per_trial={"cpu": 1, "gpu": 1 if config_ray['device'] == 'cuda' else 0},
+                        max_concurrent_trials=15,
+                        log_to_file=True,
+                        local_dir=save_path,
+                        trial_name_creator=lambda t: trial_name(t, hps),
+                        trial_dirname_creator=lambda t: trial_name(t, hps),
+                        progress_reporter=reporter,
+                        verbose=1)  # resume=True,
diff --git a/RLLG/notebooks/Visualization.ipynb b/RLLG/notebooks/Visualization.ipynb
index b124a92c..1c4fce5f 100644
--- a/RLLG/notebooks/Visualization.ipynb
+++ b/RLLG/notebooks/Visualization.ipynb
@@ -1,155 +1,155 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [
-    "from IPython.core.display import display, HTML\n",
-    "display(HTML(\"<style>.container { width:90% !important; }</style>\"))"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "from ray.tune import ExperimentAnalysis\n",
-    "\n",
-    "import sys\n",
-    "sys.path.append('../')\n",
-    "\n",
-    "from helpers import plot_all\n",
-    "\n",
-    "import warnings\n",
-    "warnings.filterwarnings('ignore')"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "## Import results"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [
-    "env = \"ball_in_cup\"\n",
-    "\n",
-    "agents = [\n",
-    "    \"SAC\",\n",
-    "    \"SAG\",\n",
-    "    \"PIG\",\n",
-    "    \"PAG\"\n",
-    "]\n",
-    "\n",
-    "experts = [\n",
-    "    \"MediumSAC\",\n",
-    "]\n",
-    "\n",
-    "hps = [\n",
-    "    \"activation_fn\",\n",
-    "    \"betas\",\n",
-    "    \"decay_rate\"\n",
-    "]\n",
-    "\n",
-    "metric = \"mean_avg_return\"\n",
-    "mode = \"max\"\n",
-    "n_epochs = 2000"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [
-    "plt.figure(figsize=(8, 6))\n",
-    "\n",
-    "plot_all(env, \n",
-    "         agents, \n",
-    "         experts, \n",
-    "         rolling_mean=0.05,\n",
-    "         init_path=\"..\", \n",
-    "         hps=hps, \n",
-    "         set_hyperparam={'pos_tol': 1.7},\n",
-    "         chosen_max=2000,\n",
-    "         n_epochs=2000,\n",
-    "         metric=\"mean_avg_return\", \n",
-    "         mode=\"max\", \n",
-    "         to_plot=\"final\")\n",
-    "\n",
-    "plt.xlabel(\"epochs (one epoch = one episode = 1000 steps)\")\n",
-    "plt.ylabel(\"Average return over 5 seeds\")\n",
-    "plt.title(\"Ball in Cup\")\n",
-    "\n",
-    "plt.show()"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 1
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "from IPython.core.display import display, HTML\n",
+    "display(HTML(\"<style>.container { width:90% !important; }</style>\"))"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "from ray.tune import ExperimentAnalysis\n",
+    "\n",
+    "import sys\n",
+    "sys.path.append('../')\n",
+    "\n",
+    "from helpers import plot_all\n",
+    "\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## Import results"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "env = \"ball_in_cup\"\n",
+    "\n",
+    "agents = [\n",
+    "    \"SAC\",\n",
+    "    \"SAG\",\n",
+    "    \"PIG\",\n",
+    "    \"PAG\"\n",
+    "]\n",
+    "\n",
+    "experts = [\n",
+    "    \"MediumSAC\",\n",
+    "]\n",
+    "\n",
+    "hps = [\n",
+    "    \"activation_fn\",\n",
+    "    \"betas\",\n",
+    "    \"decay_rate\"\n",
+    "]\n",
+    "\n",
+    "metric = \"mean_avg_return\"\n",
+    "mode = \"max\"\n",
+    "n_epochs = 2000"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(8, 6))\n",
+    "\n",
+    "plot_all(env, \n",
+    "         agents, \n",
+    "         experts, \n",
+    "         rolling_mean=0.05,\n",
+    "         init_path=\"..\", \n",
+    "         hps=hps, \n",
+    "         set_hyperparam={'pos_tol': 1.7},\n",
+    "         chosen_max=2000,\n",
+    "         n_epochs=2000,\n",
+    "         metric=\"mean_avg_return\", \n",
+    "         mode=\"max\", \n",
+    "         to_plot=\"final\")\n",
+    "\n",
+    "plt.xlabel(\"epochs (one epoch = one episode = 1000 steps)\")\n",
+    "plt.ylabel(\"Average return over 5 seeds\")\n",
+    "plt.title(\"Ball in Cup\")\n",
+    "\n",
+    "plt.show()"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
 }
\ No newline at end of file
diff --git a/RLLG/notebooks/helpers.py b/RLLG/notebooks/helpers.py
index efbbff31..fad973a6 100644
--- a/RLLG/notebooks/helpers.py
+++ b/RLLG/notebooks/helpers.py
@@ -4,28 +4,53 @@
 import matplotlib.pyplot as plt
 from ray.tune import ExperimentAnalysis
 from scipy.integrate import simps
+from typing import Union, Any, Dict, List, Optional, Tuple
 
 
-def plot_curves(analysis,
-                hps,
-                metric,
-                rolling_mean=0.6,
-                set_hyperparam={},
-                hyperparam_comparison=None,
-                to_plot="final",
-                label="SAC",
-                chosen_max=1000,
-                n_epochs=2000,
-                retrieve_auc=False):
+def plot_curves(analysis: tune.ray.ExperimentAnalysis,
+                hps: List[str],
+                metric: str,
+                rolling_mean: float = 0.6,
+                set_hyperparam: Dict[str, Any] = {},
+                hyperparam_comparison: Optional[str] = None,
+                to_plot: Optional[str] = "final",
+                label: Optional[str] = "SAC",
+                chosen_max: Optional[int] = 1000,
+                n_epochs: Optional[int] = 2000,
+                retrieve_auc: Optional[bool] = False) -> Optional[Dict[str, Union[float, float]]]:
+
     """
-    analysis:
-        tune.ray.ExperimentAnalysis
-    hps: hyperparams to choose
-        list
-    metric:
-        str
-    to_plot: to plot best final mean or best overall
-        str: choose between final and overall
+    Plot learning curves based on the specified analysis, hyperparameters, and metric.
+
+    Parameters:
+    ----------
+    analysis : tune.ray.ExperimentAnalysis
+        The ray tune analysis object containing information about the experiment
+    hps : List[str]
+        List of hyperparameters to choose for plotting
+    metric : str
+        The metric to be plotted
+    rolling_mean : float, optional
+        The alpha value for exponential weighted moving average (default is 0.6).
+    set_hyperparam : Dict[str, Any], optional
+        Dictionary specifying hyperparameters and their values to set during plotting (default is an empty dictionary).
+    hyperparam_comparison : str, optional
+        String specifying the hyperparameters to compare during plotting (default is None).
+    to_plot : str, optional
+        String specifying whether to plot the best final mean or the best overall (default is "final").
+    label : str, optional
+        Label for the plot (default is "SAC").
+    chosen_max : int, optional
+        The chosen maximum value for the metric (default is 1000).
+    n_epochs : int, optional
+        The number of epochs for plotting (default is 2000).
+    retrieve_auc : bool, optional
+        Boolean indicating whether to retrieve the area under the curve (AUC) and final performance (default is False).
+
+    Returns:
+    ----------
+    Optional[Dict[str, Union[float, float]]]
+        A dictionary containing AUC and final performance if retrieve_auc is True, otherwise None.
     """
     group_by = [f'config/{hp}' for hp in hps if hp != 'repeat_run'] + ['epoch']
     dfs = analysis.trial_dataframes
@@ -154,37 +179,59 @@ def plot_curves(analysis,
         }
 
 
-def plot_all(env,
-             agents,
-             experts,
-             rolling_mean=0.6,
-             set_hyperparam={},
-             hyperparam_comparison=None,
-             init_path="..",
-             hps=['betas'],
-             metric="mean_avg_return",
-             mode="max",
-             to_plot="final",
-             chosen_max=1000,
-             n_epochs=2000,
-             retrieve_auc=False):
+def plot_all(env: str,
+             agents: List[str],
+             experts: List[str],
+             rolling_mean: float = 0.6,
+             set_hyperparam: Dict[str, Any] = {},
+             hyperparam_comparison: Optional[str] = None,
+             init_path: Optional[str] = "..",
+             hps: Optional[List[str]] = ['betas'],
+             metric: Optional[str] = "mean_avg_return",
+             mode: Optional[str] = "max",
+             to_plot: Optional[str] = "final",
+             chosen_max: Optional[int] = 1000,
+             n_epochs: Optional[int] = 2000,
+             retrieve_auc: Optional[bool] = False) -> Optional[Dict[str, Union[float, float]]]:
     """
-    env:
-        str
-    agents:
-        list of str
-    init_path:
-        str
-    hps: hyperparams to choose
-        list
-    metric:
-        str
-    mode:
-        str
-    to_plot: to plot best final mean or best overall
-        str: choose between final and overall
-    n_epochs:
-        int
+    Plot learning curves for multiple agents and experts based on the specified environment.
+
+    Parameters:
+    ----------
+    env : str
+        The environment for which learning curves will be plotted
+    agents : List[str]
+        List of agent names to be included in the plot
+    experts : List[str]
+        List of expert names to be included in the plot
+    rolling_mean : float, optional
+        The alpha value for exponential weighted moving average (default is 0.6)
+    set_hyperparam : Dict[str, Any], optional
+        Dictionary specifying hyperparameters and their values to set during plotting (default is an empty dictionary)
+    hyperparam_comparison : str, optional
+        String specifying the hyperparameters to compare during plotting (default is None)
+    init_path : str, optional
+        The initial path where ray_results are stored (default is "..")
+    hps : List[str], optional
+        List of hyperparameters to choose for plotting (default is ['betas'])
+    metric : str, optional
+        The metric to be plotted (default is "mean_avg_return")
+    mode : str, optional
+        The mode for selecting the best values (default is "max")
+    to_plot : str, optional
+        String specifying whether to plot the best final mean or the best overall (default is "final")
+    chosen_max : int, optional
+        The chosen maximum value for the metric (default is 1000)
+    n_epochs : int, optional
+        The number of epochs for plotting (default is 2000)
+    retrieve_auc : bool, optional
+        Boolean indicating whether to retrieve the area under the curve (AUC) and final performance (default is False)
+
+    Returns:
+    ----------
+    Optional[Dict[str, Union[float, float]]]
+        A dictionary containing AUC and final performance for each agent-expert combination if retrieve_auc is True,
+        otherwise None.
     """
     assert to_plot in ["overall", "final"]
 
diff --git a/RLLG/notebooks/video_fn.py b/RLLG/notebooks/video_fn.py
index 764c661a..1e307adb 100644
--- a/RLLG/notebooks/video_fn.py
+++ b/RLLG/notebooks/video_fn.py
@@ -6,9 +6,25 @@
 from IPython import display
 from dm_control.utils import rewards as rewards_fn
 from dmc2gym.wrappers import _flatten_obs
+from typing import Union, Any, Dict, List, Optional, Tuple
 
 
-def grabFrame(env):
+def grabFrame(env: Any) -> np.ndarray:
+    """
+    Capture and return a frame from the dm_control environment rendering.
+
+    Parameters:
+    ----------
+    env : dm_control suite env
+        The dm control suite environment
+
+    Returns:
+    ----------
+    np.ndarray
+        A NumPy array representing the RGB frame captured from the environment rendering
+
+    """
+    # Get RGB ren
     # Get RGB rendering of env
     rgbArr = env.physics.render(480, 600, camera_id=0)
     # Convert to BGR for use with OpenCV
@@ -16,7 +32,23 @@ def grabFrame(env):
 
 
 # Use 'jpeg' instead of 'png' (~5 times faster)
-def array_to_image(a, fmt='jpeg'):
+def array_to_image(a: np.ndarray, fmt: Optional[str] = 'jpeg') -> display.Image:
+    """
+    Convert a NumPy array to an image and display it using IPython's display module.
+
+    Parameters:
+    ----------
+    a : numpy.ndarray
+        The input NumPy array representing an image
+    fmt : str, optional
+        The image format to use (default is 'jpeg')
+
+    Returns:
+    ----------
+    IPython.display.Image
+        An IPython Image object representing the displayed image
+
+    """
     # Create binary stream object
     f = BytesIO()
 
@@ -25,8 +57,32 @@ def array_to_image(a, fmt='jpeg'):
 
     return display.Image(data=f.getvalue())
 
+def create_dm_video(env: Any,
+                    policy: str = "random",
+                    verbose: int = 0,
+                    video_name: str = "video.mp4",
+                    not_plot: bool = False) -> None:
+    """
+    Create a video of an episode in a dm_control environment.
 
-def create_dm_video(env, policy="random", verbose=0, video_name="video.mp4", not_plot=False):
+    Parameters:
+    ----------
+    env : Any
+        The dm_control environment
+    policy : str or callable, optional
+        The policy used to generate actions. If "random", random actions are used.
+        If a callable, it should take an observation and return an action.
+    verbose : int, optional
+        Verbosity level. If greater than 0, print additional information during video creation.
+    video_name : str, optional
+        The name of the output video file (default is "video.mp4").
+    not_plot : bool, optional
+        If True, do not plot (default is False).
+
+    Returns:
+    ----------
+    None
+    """
     frame = grabFrame(env)
     height, width, layers = frame.shape
     if not not_plot:
@@ -77,7 +133,23 @@ def create_dm_video(env, policy="random", verbose=0, video_name="video.mp4", not
         video.release()
 
 
-def plot_video(d, d2, video_name="video.mp4"):
+def plot_video(d: Dict, d2: Dict, video_name: str = "video.mp4") -> None:
+    """
+    Plot a video usind d and d2 for display.
+
+    Parameters:
+    ----------
+    d : Any
+        The dictionary used to update the video frames.
+    d2 : Any
+        The dictionary used to update the display with additional information (e.g., FPS).
+    video_name : str, optional
+        The name of the input video file (default is "video.mp4").
+
+    Returns:
+    ----------
+    None
+    """
     cap = cv2.VideoCapture(video_name)
     while (cap.isOpened()):
         t1 = time.time()
@@ -93,13 +165,38 @@ def plot_video(d, d2, video_name="video.mp4"):
     cap.release()
 
 
-def plot_total_video(env, d, d2, policy="random", verbose=0, video_name="video.mp4", not_plot=False):
-    """
-    Note this function requires d and d2. They must be created with the following in the Jupyter Notebook:
-        >>> d = display.display("", display_id=1)
-        >>> d2 = display.display("", display_id=2)
+def plot_total_video(env: Any,
+                     d: dict,
+                     d2: dict,
+                     policy: str = "random",
+                     verbose: int = 0,
+                     video_name: str = "video.mp4",
+                     not_plot: bool = False) -> None:
     """
+    Plot a total video using dictionaries for display.
+
+    Parameters:
+    ----------
+    env : type
+        Description of parameter `env`.
+    d : dict
+        The dictionary used to update the video frames.
+    d2 : dict
+        The dictionary used to update the display with additional information (e.g., FPS)
+    policy : str, optional
+        The policy to be used (default is "random")
+    verbose : int, optional
+        Verbosity level (default is 0).
+    video_name : str, optional
+        The name of the output video file (default is "video.mp4")
+    not_plot : bool, optional
+        If True, do not plot the video (default is False)
 
+    Returns:
+    ----------
+    None
+
+    """
     create_dm_video(env, policy=policy, verbose=verbose, video_name=video_name, not_plot=not_plot)
     if not not_plot:
-        plot_video(d, d2, video_name=video_name)
\ No newline at end of file
+        plot_video(d, d2, video_name=video_name)
diff --git a/RLLG/ray_config/ball_in_cup_cfg.yaml b/RLLG/ray_config/ball_in_cup_cfg.yaml
index 0b6407b3..b21eddca 100644
--- a/RLLG/ray_config/ball_in_cup_cfg.yaml
+++ b/RLLG/ray_config/ball_in_cup_cfg.yaml
@@ -1,65 +1,65 @@
-# RL env and common variables
-repeat_run: 5
-max_traj_length: 1000
-replay_buffer_size: 100000
-seed: 42
-network_arch: '64-64'
-policy_arch: '64-64'
-qf_arch: '64-64'
-policy_log_std_multiplier: 1.0
-policy_log_std_offset: -1.0
-n_epochs: 1000
-n_initial_env_steps: 1000
-n_env_steps_per_epoch: 1000
-n_train_step_per_epoch: 1000
-eval_period: 1
-eval_n_trajs: 5
-batch_size: 256
-discount: 0.99
-use_automatic_entropy_tuning: True
-alpha_multiplier: 1.0
-backup_entropy: True
-target_entropy: 0.0
-policy_lr: 3.0e-4
-qf_lr: 3.0e-4
-optimizer_type: 'adam'
-soft_target_update_rate: 0.005 # 5e-3
-target_update_period: 1
-
-# hyperparams for stabilization
-activation_fn: 'relu'
-
-# for improved switched sac
-use_automatic_entropy_tuning_parametrized_perturbation: True
-expert_alpha_multiplier: 1.0
-
-# Local experts variables
-local_experts:
-  - 'MediumSAC'
-
-beta:
-  SAC: [ 0.0 ]
-  SAG: [ 0.0 ]
-  PIG: [ 1.0 ] # [ 0.5, 1, 2, 5 ]
-  PAG: [ 0.0 ]
-
-decay_parameter:
-  - False
-delta:
-  SAC: [ 1.0 ]
-  SAG: [ 1.0 ]
-  PIG: [ 0.9 ] # [ 0.5, 0.8, 0.9 ]
-  PAG: [ 1.0 ]
-
-phi:
-  SAC: [ 0.0 ]
-  SAG: [ 0.0 ]
-  PIG: [ 0.0 ]
-  PAG: [ 0.8 ] # [ 0.5, 0.8, 1.0, 1.5 ]
-
-# Ray variables
-metric: 'mean_avg_return'
-mode: 'max'
-
-# Save policy
-num_epoch_save: 50
+# RL env and common variables
+repeat_run: 5
+max_traj_length: 1000
+replay_buffer_size: 100000
+seed: 42
+network_arch: '64-64'
+policy_arch: '64-64'
+qf_arch: '64-64'
+policy_log_std_multiplier: 1.0
+policy_log_std_offset: -1.0
+n_epochs: 1 # 1000
+n_initial_env_steps: 10
+n_env_steps_per_epoch: 10 # 1000
+n_train_step_per_epoch: 10 # 1000
+eval_period: 1
+eval_n_trajs: 5
+batch_size: 256
+discount: 0.99
+use_automatic_entropy_tuning: True
+alpha_multiplier: 1.0
+backup_entropy: True
+target_entropy: 0.0
+policy_lr: 3.0e-4
+qf_lr: 3.0e-4
+optimizer_type: 'adam'
+soft_target_update_rate: 0.005 # 5e-3
+target_update_period: 1
+
+# hyperparams for stabilization
+activation_fn: 'relu'
+
+# for improved switched sac
+use_automatic_entropy_tuning_parametrized_perturbation: True
+expert_alpha_multiplier: 1.0
+
+# Local experts variables
+local_experts:
+  - 'MediumSAC'
+
+beta:
+  SAC: [ 0.0 ]
+  SAG: [ 0.0 ]
+  PIG: [ 1.0 ] # [ 0.5, 1, 2, 5 ]
+  PAG: [ 0.0 ]
+
+decay_parameter:
+  - False
+delta:
+  SAC: [ 1.0 ]
+  SAG: [ 1.0 ]
+  PIG: [ 0.9 ] # [ 0.5, 0.8, 0.9 ]
+  PAG: [ 1.0 ]
+
+phi:
+  SAC: [ 0.0 ]
+  SAG: [ 0.0 ]
+  PIG: [ 0.0 ]
+  PAG: [ 0.8 ] # [ 0.5, 0.8, 1.0, 1.5 ]
+
+# Ray variables
+metric: 'mean_avg_return'
+mode: 'max'
+
+# Save policy
+num_epoch_save: 50
diff --git a/RLLG/ray_config/bullet_small_reach_cfg.yaml b/RLLG/ray_config/bullet_small_reach_cfg.yaml
index d61282e5..20b4c76f 100644
--- a/RLLG/ray_config/bullet_small_reach_cfg.yaml
+++ b/RLLG/ray_config/bullet_small_reach_cfg.yaml
@@ -1,71 +1,71 @@
-# RL env and common variables
-repeat_run: 5
-max_traj_length: 1000
-replay_buffer_size: 1000000
-seed: 42
-network_arch: '64-64'
-policy_arch: '64-64'
-qf_arch: '64-64'
-policy_log_std_multiplier: 1.0
-policy_log_std_offset: -1.0
-n_epochs: 2001
-n_initial_env_steps: 1000
-n_env_steps_per_epoch: 1000
-n_train_step_per_epoch: 1000
-eval_period: 1
-eval_n_trajs: 5
-batch_size: 256
-discount: 0.99
-use_automatic_entropy_tuning: True
-alpha_multiplier: 1.0
-backup_entropy: True
-target_entropy: 0.0
-policy_lr: 3.0e-4
-qf_lr: 3.0e-4
-optimizer_type: 'adam'
-soft_target_update_rate: 0.005 # 5e-3
-target_update_period: 1
-
-# hyperparams for stabilization
-activation_fn: 'relu'
-
-# for improved switched sac
-use_automatic_entropy_tuning_parametrized_perturbation: True
-expert_alpha_multiplier: 1.0
-
-# Local experts variables
-local_experts:
-  - 'SafeScripted'
-
-pos_tol_choices:
-  SAC: [ 2.0 ]
-  SAG: [ 2.0 ]
-  PIG: [ 2.0 ]
-  PAG: [ 2.0 ]
-
-beta:
-  SAC: [ 1.0 ]
-  SAG: [ 1.0 ]
-  PIG: [ 0.5, 1, 2, 5 ]
-  PAG: [ 1.0 ]
-
-decay_parameter:
-  - False
-delta:
-  SAC: [ 1.0 ]
-  SAG: [ 1.0 ]
-  PIG: [ 1.0 ]
-  PAG: [ 0.5, 0.9 ]
-
-phi:
-  SAC: [ 1.0 ]
-  SAG: [ 1.0 ]
-  PIG: [ 1.0 ]
-  PAG: [ 0.2, 0.6, 0.8 ]
-
-# Ray variables
-metric: 'mean_avg_return'
-mode: 'max'
-
-# Save policy
-num_epoch_save: 10000
+# RL env and common variables
+repeat_run: 5
+max_traj_length: 1000
+replay_buffer_size: 1000000
+seed: 42
+network_arch: '64-64'
+policy_arch: '64-64'
+qf_arch: '64-64'
+policy_log_std_multiplier: 1.0
+policy_log_std_offset: -1.0
+n_epochs: 2001
+n_initial_env_steps: 1000
+n_env_steps_per_epoch: 1000
+n_train_step_per_epoch: 1000
+eval_period: 1
+eval_n_trajs: 5
+batch_size: 256
+discount: 0.99
+use_automatic_entropy_tuning: True
+alpha_multiplier: 1.0
+backup_entropy: True
+target_entropy: 0.0
+policy_lr: 3.0e-4
+qf_lr: 3.0e-4
+optimizer_type: 'adam'
+soft_target_update_rate: 0.005 # 5e-3
+target_update_period: 1
+
+# hyperparams for stabilization
+activation_fn: 'relu'
+
+# for improved switched sac
+use_automatic_entropy_tuning_parametrized_perturbation: True
+expert_alpha_multiplier: 1.0
+
+# Local experts variables
+local_experts:
+  - 'SafeScripted'
+
+pos_tol_choices:
+  SAC: [ 2.0 ]
+  SAG: [ 2.0 ]
+  PIG: [ 2.0 ]
+  PAG: [ 2.0 ]
+
+beta:
+  SAC: [ 1.0 ]
+  SAG: [ 1.0 ]
+  PIG: [ 0.5, 1, 2, 5 ]
+  PAG: [ 1.0 ]
+
+decay_parameter:
+  - False
+delta:
+  SAC: [ 1.0 ]
+  SAG: [ 1.0 ]
+  PIG: [ 1.0 ]
+  PAG: [ 0.5, 0.9 ]
+
+phi:
+  SAC: [ 1.0 ]
+  SAG: [ 1.0 ]
+  PIG: [ 1.0 ]
+  PAG: [ 0.2, 0.6, 0.8 ]
+
+# Ray variables
+metric: 'mean_avg_return'
+mode: 'max'
+
+# Save policy
+num_epoch_save: 10000
diff --git a/RLLG/ray_config/cartpole_cfg.yaml b/RLLG/ray_config/cartpole_cfg.yaml
index 5c52434a..6e87228c 100644
--- a/RLLG/ray_config/cartpole_cfg.yaml
+++ b/RLLG/ray_config/cartpole_cfg.yaml
@@ -1,81 +1,81 @@
-# RL env and common variables
-repeat_run: 5
-max_traj_length: 1000
-replay_buffer_size: 1000000
-seed: 42
-network_arch: '32-32'
-policy_arch: '32-32'
-qf_arch: '32-32'
-policy_log_std_multiplier: 1.0
-policy_log_std_offset: -1.0
-n_epochs: 2001
-n_initial_env_steps: 1000
-n_env_steps_per_epoch: 1000
-n_train_step_per_epoch: 1000
-eval_period: 1
-eval_n_trajs: 5
-batch_size: 256
-discount: 0.99
-use_automatic_entropy_tuning: True
-alpha_multiplier: 1.0
-backup_entropy: True
-target_entropy: 0.0
-policy_lr: 3.0e-4
-qf_lr: 3.0e-4
-optimizer_type: 'adam'
-soft_target_update_rate: 0.005 # 5e-3
-target_update_period: 1
-
-# hyperparams for stabilization
-activation_fn: 'tanh'
-
-# for improved switched sac
-use_automatic_entropy_tuning_parametrized_perturbation: True
-expert_alpha_multiplier: 1.0
-
-# safe cartpole hyperparms
-limit_cart: 1.9
-reward_end:
-  - 1000
-
-# SAC changes
-use_success_buffer: False
-ratio_success:
-  - 0.0
-
-# Local experts variables
-local_experts:
-  - 'SafeScripted'
-
-pos_tol_choices:
-  SAC: [ 1.7 ]
-  SAG: [ 1.7 ]
-  PIG: [ 1.7 ]
-  PAG: [ 1.7 ]
-
-beta:
-  SAC: [ 1.0 ]
-  SAG: [ 1.0 ]
-  PIG: [ 0.5, 1, 2, 5 ]
-  PAG: [ 1.0 ]
-
-decay_parameter:
-  - True
-decay_rate:
-  SAC: [ 1.0 ]
-  SAG: [ 1.0 ]
-  PIG: [ 1.0 ]
-  PAG: [ 0.7, 0.8, 0.9 ]
-
-phi:
-  SAC: [ 1.0 ]
-  SAG: [ 1.0 ]
-  PIG: [ 1.0 ]
-  PAG: [ 0.5, 0.7, 0.9 ]
-
-# Ray variables
-metric: 'mean_avg_return'
-mode: 'max'
-
-# Save policy
-num_epoch_save: 1000
+# RL env and common variables
+repeat_run: 5
+max_traj_length: 1000
+replay_buffer_size: 1000000
+seed: 42
+network_arch: '32-32'
+policy_arch: '32-32'
+qf_arch: '32-32'
+policy_log_std_multiplier: 1.0
+policy_log_std_offset: -1.0
+n_epochs: 2001
+n_initial_env_steps: 1000
+n_env_steps_per_epoch: 1000
+n_train_step_per_epoch: 1000
+eval_period: 1
+eval_n_trajs: 5
+batch_size: 256
+discount: 0.99
+use_automatic_entropy_tuning: True
+alpha_multiplier: 1.0
+backup_entropy: True
+target_entropy: 0.0
+policy_lr: 3.0e-4
+qf_lr: 3.0e-4
+optimizer_type: 'adam'
+soft_target_update_rate: 0.005 # 5e-3
+target_update_period: 1
+
+# hyperparams for stabilization
+activation_fn: 'tanh'
+
+# for improved switched sac
+use_automatic_entropy_tuning_parametrized_perturbation: True
+expert_alpha_multiplier: 1.0
+
+# safe cartpole hyperparms
+limit_cart: 1.9
+reward_end:
+  - 1000
+
+# SAC changes
+use_success_buffer: False
+ratio_success:
+  - 0.0
+
+# Local experts variables
+local_experts:
+  - 'SafeScripted'
+
+pos_tol_choices:
+  SAC: [ 1.7 ]
+  SAG: [ 1.7 ]
+  PIG: [ 1.7 ]
+  PAG: [ 1.7 ]
+
+beta:
+  SAC: [ 1.0 ]
+  SAG: [ 1.0 ]
+  PIG: [ 0.5, 1, 2, 5 ]
+  PAG: [ 1.0 ]
+
+decay_parameter:
+  - True
+decay_rate:
+  SAC: [ 1.0 ]
+  SAG: [ 1.0 ]
+  PIG: [ 1.0 ]
+  PAG: [ 0.7, 0.8, 0.9 ]
+
+phi:
+  SAC: [ 1.0 ]
+  SAG: [ 1.0 ]
+  PIG: [ 1.0 ]
+  PAG: [ 0.5, 0.7, 0.9 ]
+
+# Ray variables
+metric: 'mean_avg_return'
+mode: 'max'
+
+# Save policy
+num_epoch_save: 1000
diff --git a/RLLG/ray_config/hirl_point_fall_cfg.yaml b/RLLG/ray_config/hirl_point_fall_cfg.yaml
index 808c6cc7..61075770 100644
--- a/RLLG/ray_config/hirl_point_fall_cfg.yaml
+++ b/RLLG/ray_config/hirl_point_fall_cfg.yaml
@@ -1,66 +1,66 @@
-# RL env and common variables
-repeat_run: 5
-max_traj_length: 1000
-replay_buffer_size: 1000000
-seed: 42
-network_arch: '32-32'
-policy_arch: '32-32'
-qf_arch: '32-32'
-policy_log_std_multiplier: 1.0
-policy_log_std_offset: -1.0
-n_epochs: 2001
-n_initial_env_steps: 1000
-n_env_steps_per_epoch: 1000
-n_train_step_per_epoch: 1000
-eval_period: 1
-eval_n_trajs: 5
-batch_size: 256
-discount: 0.99
-use_automatic_entropy_tuning: True
-alpha_multiplier: 1.0
-backup_entropy: True
-target_entropy: 0.0
-policy_lr: 3.0e-4
-qf_lr: 3.0e-4
-optimizer_type: 'adam'
-soft_target_update_rate: 5.0e-3
-target_update_period: 1
-
-# hyperparams for stabilization
-activation_fn: 'relu'
-
-# for improved switched sac
-use_automatic_entropy_tuning_parametrized_perturbation: True
-expert_alpha_multiplier: 1.0
-
-# Local experts variables
-local_experts:
-  - 'MediumSAC'
-
-beta:
-  SAC: [ 0.0 ]
-  SAG: [ 0.0 ]
-  PIG: [ 0.5, 1, 2, 5 ]
-  PAG: [ 0.0 ]
-
-decay_parameter:
-  - False
-delta:
-  SAC: [ 1.0 ]
-  SAG: [ 1.0 ]
-  PIG: [ 0.5, 0.8, 0.9 ]
-  PAG: [ 1.0 ]
-
-phi:
-  SAC: [ 0.0 ]
-  SAG: [ 0.0 ]
-  PIG: [ 0.0 ]
-  PAG: [ 0.5, 0.8, 1.0, 1.5 ]
-
-# Ray variables
-metric: 'mean_avg_return'
-mode: 'max'
-
-# Save policy
-num_epoch_save: 50
-
+# RL env and common variables
+repeat_run: 5
+max_traj_length: 1000
+replay_buffer_size: 1000000
+seed: 42
+network_arch: '32-32'
+policy_arch: '32-32'
+qf_arch: '32-32'
+policy_log_std_multiplier: 1.0
+policy_log_std_offset: -1.0
+n_epochs: 2001
+n_initial_env_steps: 1000
+n_env_steps_per_epoch: 1000
+n_train_step_per_epoch: 1000
+eval_period: 1
+eval_n_trajs: 5
+batch_size: 256
+discount: 0.99
+use_automatic_entropy_tuning: True
+alpha_multiplier: 1.0
+backup_entropy: True
+target_entropy: 0.0
+policy_lr: 3.0e-4
+qf_lr: 3.0e-4
+optimizer_type: 'adam'
+soft_target_update_rate: 5.0e-3
+target_update_period: 1
+
+# hyperparams for stabilization
+activation_fn: 'relu'
+
+# for improved switched sac
+use_automatic_entropy_tuning_parametrized_perturbation: True
+expert_alpha_multiplier: 1.0
+
+# Local experts variables
+local_experts:
+  - 'MediumSAC'
+
+beta:
+  SAC: [ 0.0 ]
+  SAG: [ 0.0 ]
+  PIG: [ 0.5, 1, 2, 5 ]
+  PAG: [ 0.0 ]
+
+decay_parameter:
+  - False
+delta:
+  SAC: [ 1.0 ]
+  SAG: [ 1.0 ]
+  PIG: [ 0.5, 0.8, 0.9 ]
+  PAG: [ 1.0 ]
+
+phi:
+  SAC: [ 0.0 ]
+  SAG: [ 0.0 ]
+  PIG: [ 0.0 ]
+  PAG: [ 0.5, 0.8, 1.0, 1.5 ]
+
+# Ray variables
+metric: 'mean_avg_return'
+mode: 'max'
+
+# Save policy
+num_epoch_save: 50
+
diff --git a/RLLG/ray_config/point_circle_cfg.yaml b/RLLG/ray_config/point_circle_cfg.yaml
index 1ff41c11..c8768b9c 100644
--- a/RLLG/ray_config/point_circle_cfg.yaml
+++ b/RLLG/ray_config/point_circle_cfg.yaml
@@ -1,71 +1,71 @@
-# RL env and common variables
-repeat_run: 5
-max_traj_length: 1000
-replay_buffer_size: 100000
-seed: 42
-network_arch: '64-64'
-policy_arch: '64-64'
-qf_arch: '64-64'
-policy_log_std_multiplier: 1.0
-policy_log_std_offset: -1.0
-n_epochs: 2001
-n_initial_env_steps: 1000
-n_env_steps_per_epoch: 1000
-n_train_step_per_epoch: 1000
-eval_period: 1
-eval_n_trajs: 5
-batch_size: 256
-discount: 0.99
-use_automatic_entropy_tuning: True
-alpha_multiplier: 1.0
-backup_entropy: True
-target_entropy: 0.0
-policy_lr: 3.0e-4
-qf_lr: 3.0e-4
-optimizer_type: 'adam'
-soft_target_update_rate: 0.005 # 5e-3
-target_update_period: 1
-
-# hyperparams for stabilization
-activation_fn: 'relu'
-
-# for improved switched sac
-use_automatic_entropy_tuning_parametrized_perturbation: True
-expert_alpha_multiplier: 1.0
-
-# Local experts variables
-local_experts:
-  - 'SafeScripted'
-
-pos_tol_choices:
-  SAC: [ 0.5 ]
-  SAG: [ 0.5 ]
-  PIG: [ 0.5 ]
-  PAG: [ 0.5 ]
-
-beta:
-  SAC: [ 1.0 ]
-  SAG: [ 1.0 ]
-  PIG: [ 0.5, 1, 2, 5 ]
-  PAG: [ 1.0 ]
-
-decay_parameter:
-  - True
-delta:
-  SAC: [ 1.0 ]
-  SAG: [ 1.0 ]
-  PIG: [ 1.0 ]
-  PAG: [ 0.5, 0.8, 0.9 ]
-
-phi:
-  SAC: [ 0.0 ]
-  SAG: [ 0.0 ]
-  PIG: [ 0.0 ]
-  PAG: [ 0.1, 0.3, 0.5, 1.0 ]
-
-# Ray variables
-metric: 'mean_avg_return'
-mode: 'max'
-
-# Save policy
-num_epoch_save: 5000
+# RL env and common variables
+repeat_run: 5
+max_traj_length: 1000
+replay_buffer_size: 100000
+seed: 42
+network_arch: '64-64'
+policy_arch: '64-64'
+qf_arch: '64-64'
+policy_log_std_multiplier: 1.0
+policy_log_std_offset: -1.0
+n_epochs: 2001
+n_initial_env_steps: 1000
+n_env_steps_per_epoch: 1000
+n_train_step_per_epoch: 1000
+eval_period: 1
+eval_n_trajs: 5
+batch_size: 256
+discount: 0.99
+use_automatic_entropy_tuning: True
+alpha_multiplier: 1.0
+backup_entropy: True
+target_entropy: 0.0
+policy_lr: 3.0e-4
+qf_lr: 3.0e-4
+optimizer_type: 'adam'
+soft_target_update_rate: 0.005 # 5e-3
+target_update_period: 1
+
+# hyperparams for stabilization
+activation_fn: 'relu'
+
+# for improved switched sac
+use_automatic_entropy_tuning_parametrized_perturbation: True
+expert_alpha_multiplier: 1.0
+
+# Local experts variables
+local_experts:
+  - 'SafeScripted'
+
+pos_tol_choices:
+  SAC: [ 0.5 ]
+  SAG: [ 0.5 ]
+  PIG: [ 0.5 ]
+  PAG: [ 0.5 ]
+
+beta:
+  SAC: [ 1.0 ]
+  SAG: [ 1.0 ]
+  PIG: [ 0.5, 1, 2, 5 ]
+  PAG: [ 1.0 ]
+
+decay_parameter:
+  - True
+delta:
+  SAC: [ 1.0 ]
+  SAG: [ 1.0 ]
+  PIG: [ 1.0 ]
+  PAG: [ 0.5, 0.8, 0.9 ]
+
+phi:
+  SAC: [ 0.0 ]
+  SAG: [ 0.0 ]
+  PIG: [ 0.0 ]
+  PAG: [ 0.1, 0.3, 0.5, 1.0 ]
+
+# Ray variables
+metric: 'mean_avg_return'
+mode: 'max'
+
+# Save policy
+num_epoch_save: 5000
diff --git a/RLLG/ray_config/point_mass_cfg.yaml b/RLLG/ray_config/point_mass_cfg.yaml
index a1144843..c6d8833c 100644
--- a/RLLG/ray_config/point_mass_cfg.yaml
+++ b/RLLG/ray_config/point_mass_cfg.yaml
@@ -1,66 +1,66 @@
-# RL env and common variables
-repeat_run: 5
-max_traj_length: 1000
-replay_buffer_size: 100000
-seed: 42
-network_arch: '64-64'
-policy_arch: '64-64'
-qf_arch: '64-64'
-policy_log_std_multiplier: 1.0
-policy_log_std_offset: -1.0
-n_epochs: 1000
-n_initial_env_steps: 1000
-n_env_steps_per_epoch: 1000
-n_train_step_per_epoch: 1000
-eval_period: 1
-eval_n_trajs: 5
-batch_size: 256
-discount: 0.99
-use_automatic_entropy_tuning: True
-alpha_multiplier: 1.0
-backup_entropy: True
-target_entropy: 0.0
-policy_lr: 3.0e-4
-qf_lr: 3.0e-4
-optimizer_type: 'adam'
-soft_target_update_rate: 0.005 # 5e-3
-target_update_period: 1
-
-# hyperparams for stabilization
-activation_fn: 'relu'
-
-# for improved switched sac
-use_automatic_entropy_tuning_parametrized_perturbation: True
-expert_alpha_multiplier: 1.0
-
-# Local experts variables
-local_experts:
-  - 'MediumSAC'
-
-beta:
-  SAC: [ 0.0 ]
-  SAG: [ 0.0 ]
-  PIG: [ 0.5, 1, 2, 5 ]
-  PAG: [ 0.0 ]
-
-
-decay_parameter:
-  - False
-delta:
-  SAC: [ 1.0 ]
-  SAG: [ 1.0 ]
-  PIG: [ 0.5, 0.8, 0.9 ]
-  PAG: [ 1.0 ]
-
-phi:
-  SAC: [ 0.0 ]
-  SAG: [ 0.0 ]
-  PIG: [ 0.0 ]
-  PAG: [ 0.5, 0.8, 1.0, 1.5 ]
-
-# Ray variables
-metric: 'mean_avg_return'
-mode: 'max'
-
-# Save policy
-num_epoch_save: 10000
+# RL env and common variables
+repeat_run: 5
+max_traj_length: 1000
+replay_buffer_size: 100000
+seed: 42
+network_arch: '64-64'
+policy_arch: '64-64'
+qf_arch: '64-64'
+policy_log_std_multiplier: 1.0
+policy_log_std_offset: -1.0
+n_epochs: 1000
+n_initial_env_steps: 1000
+n_env_steps_per_epoch: 1000
+n_train_step_per_epoch: 1000
+eval_period: 1
+eval_n_trajs: 5
+batch_size: 256
+discount: 0.99
+use_automatic_entropy_tuning: True
+alpha_multiplier: 1.0
+backup_entropy: True
+target_entropy: 0.0
+policy_lr: 3.0e-4
+qf_lr: 3.0e-4
+optimizer_type: 'adam'
+soft_target_update_rate: 0.005 # 5e-3
+target_update_period: 1
+
+# hyperparams for stabilization
+activation_fn: 'relu'
+
+# for improved switched sac
+use_automatic_entropy_tuning_parametrized_perturbation: True
+expert_alpha_multiplier: 1.0
+
+# Local experts variables
+local_experts:
+  - 'MediumSAC'
+
+beta:
+  SAC: [ 0.0 ]
+  SAG: [ 0.0 ]
+  PIG: [ 0.5, 1, 2, 5 ]
+  PAG: [ 0.0 ]
+
+
+decay_parameter:
+  - False
+delta:
+  SAC: [ 1.0 ]
+  SAG: [ 1.0 ]
+  PIG: [ 0.5, 0.8, 0.9 ]
+  PAG: [ 1.0 ]
+
+phi:
+  SAC: [ 0.0 ]
+  SAG: [ 0.0 ]
+  PIG: [ 0.0 ]
+  PAG: [ 0.5, 0.8, 1.0, 1.5 ]
+
+# Ray variables
+metric: 'mean_avg_return'
+mode: 'max'
+
+# Save policy
+num_epoch_save: 10000
diff --git a/RLLG/requirements.txt b/RLLG/requirements.txt
new file mode 100644
index 00000000..4725dc4d
--- /dev/null
+++ b/RLLG/requirements.txt
@@ -0,0 +1,23 @@
+setuptools==65.5.0
+wheel==0.38.0
+numpy==1.23.1
+torch==1.10.2
+tensorboardX==2.4.1
+mujoco-py==2.1.2.14
+omegaconf==2.1.1
+protobuf==3.20.0
+# Install gym by hand. Works with traditional pip install -e . inside a conda env,
+# but problem with docker otherwise.
+gym==0.21.0
+ray[tune]==1.9.2
+pyyaml
+matplotlib
+ipython
+pandas
+matplotlib
+jupyter
+ml-collections
+scipy
+# Install dmc2gym by hand, with git clone git+https://github.com/denisyarats/dmc2gym.git and
+# pip install -e .
+# 'dmc2gym @ git+https://github.com/denisyarats/dmc2gym.git'
\ No newline at end of file
diff --git a/RLLG/sac_main_fn.py b/RLLG/sac_main_fn.py
index 1bd8d790..03c8c29c 100644
--- a/RLLG/sac_main_fn.py
+++ b/RLLG/sac_main_fn.py
@@ -8,7 +8,7 @@
 # PARTICULAR PURPOSE. See the MIT License for more details.
 
 
-
+from typing import Union, Any, Dict, List, Optional, Tuple
 from copy import deepcopy
 import torch
 from omegaconf import DictConfig
@@ -17,27 +17,48 @@
 import numpy as np
 
 # agents
-from agents.algos.sac import SAC
-from agents.algos.sag import SAG
-from agents.algos.pag import PAG
-from agents.algos.pig import PIG
 from agents.common.model import TanhGaussianPolicy, ParametrizedPerturbationTanhGaussianPolicy, FullyConnectedQFunction, \
     SamplerPolicy, ExpertSamplerPolicy
 from agents.common.replay_buffer import ReplayBuffer, batch_to_torch
 from agents.common.sampler import StepSampler, TrajSampler
 from agents.common.utils import Timer, set_random_seed, prefix_metrics
+from agents.common.creation_utils import create_envs, create_agent
 from envs.creation import get_env_and_control
 from envs.confidence import global_lambda_s
 
-dict_agents = {
-    'SAC': SAC,
-    'SAG': SAG,
-    'PIG': PIG,
-    'PAG': PAG,
-}
 
 
-def save_all_models(qf1, qf2, target_qf1, target_qf2, policy, path):
+def save_all_models(qf1: torch.nn.Module,
+                    qf2: torch.nn.Module,
+                    target_qf1: torch.nn.Module,
+                    target_qf2: torch.nn.Module,
+                    policy: torch.nn.Module,
+                    path: Union[str, os.PathLike]) -> None:
+    """
+    Save the state dictionaries of the different networks the agent uses to a specific path.
+
+    Parameters:
+    ----------
+    x : type
+    Description of parameter `x`.
+    qf1 : torch.nn.Module)
+        Critic 1
+    qf2 : torch.nn.Module
+        Critic 2
+    target_qf1 : torch.nn.Module
+        Target Critic 1
+    target_qf2  :torch.nn.Module)
+        Target Critic 2
+    policy : torch.nn.Module)
+        Policy
+     path : Union[str, os.PathLike]
+        The path where the model state dictionaries will be saved
+
+    Returns:
+    ----------
+    None
+        The function does not return anything.
+    """
     torch.save(qf1.state_dict(), os.path.join(path, 'qf1'))
     torch.save(qf2.state_dict(), os.path.join(path, 'qf2'))
     torch.save(target_qf1.state_dict(), os.path.join(path, 'target_qf1'))
@@ -45,7 +66,36 @@ def save_all_models(qf1, qf2, target_qf1, target_qf2, policy, path):
     torch.save(policy.state_dict(), os.path.join(path, 'policy'))
 
 
-def load_all_models(qf1, qf2, target_qf1, target_qf2, policy, path):
+def load_all_models(qf1: torch.nn.Module,
+                    qf2: torch.nn.Module,
+                    target_qf1: torch.nn.Module,
+                    target_qf2: torch.nn.Module,
+                    policy: torch.nn.Module,
+                    path: Union[str, os.PathLike]) -> None:
+    """
+    Load the state dictionaries of the different networks the agent uses from a specific path.
+
+    Parameters:
+    ----------
+    x : type
+    Description of parameter `x`.
+    qf1 : torch.nn.Module)
+        Critic 1
+    qf2 : torch.nn.Module
+        Critic 2
+    target_qf1 : torch.nn.Module
+        Target Critic 1
+    target_qf2  :torch.nn.Module)
+        Target Critic 2
+    policy : torch.nn.Module)
+        Policy
+     path : Union[str, os.PathLike]
+        The path where the model state dictionaries will be loaded.
+
+    Returns:
+    ----------
+    None : The function does not return anything.
+    """
     qf1.load_state_dict(torch.load(os.path.join(path, 'qf1')))
     qf2.load_state_dict(torch.load(os.path.join(path, 'qf2')))
     target_qf1.load_state_dict(torch.load(os.path.join(path, 'target_qf1')))
@@ -53,7 +103,20 @@ def load_all_models(qf1, qf2, target_qf1, target_qf2, policy, path):
     policy.load_state_dict(torch.load(os.path.join(path, 'policy')))
 
 
-def main(cfg):
+def main(cfg: Dict) -> None:
+    """
+    Main function to train an RL agent using Ray Tune.
+
+    Parameters:
+    ----------
+    cfg : Dict
+        The configuration dictionary
+
+    Returns:
+    ----------
+    None
+        The function runs the training process and reports metrics to Ray Tune.
+    """
     cfg = DictConfig(cfg)
 
     # global hyperparameters
@@ -61,47 +124,30 @@ def main(cfg):
     glob_name = cfg['glob_name']
     num_run = cfg['repeat_run']
 
-    # environment parameters
-    limit_cart = None
-    reward_end = None
+    # create envs and retrieve local controls
+    env_train, local_control_dict_train, env_test, local_control_dict_test = create_envs(cfg)
+
+    # retrieve local experts and their confidence function
+    expert = cfg['expert']
     pos_tol = None
-    speed_tol = None
-    if 'limit_cart' in cfg:
-        limit_cart = cfg['limit_cart']
-    if 'reward_end' in cfg:
-        reward_end = cfg['reward_end']
     if 'pos_tol' in cfg:
         pos_tol = cfg['pos_tol']
-    env_train, local_control_dict_train = get_env_and_control(name=cfg['env'],
-                                                              orig_cwd=cfg['orig_cwd'],
-                                                              device=cfg['device'],
-                                                              limit_cart=limit_cart,
-                                                              reward_end=reward_end,
-                                                              pos_tol=pos_tol
-                                                              )
-    env_test, local_control_dict_test = get_env_and_control(name=cfg['env'],
-                                                            orig_cwd=cfg['orig_cwd'],
-                                                            device=cfg['device'],
-                                                            limit_cart=limit_cart,
-                                                            reward_end=reward_end,
-                                                            pos_tol=pos_tol
-                                                            )
-
-    # experts
-    expert = cfg['expert']
     lambda_s = global_lambda_s(cfg['glob_name'],
                                expert,
                                device=cfg['device'],
-                               pos_tol=pos_tol,
-                               speed_tol=speed_tol
+                               pos_tol=pos_tol
                                )
     local_expert = local_control_dict_train[expert]['local_expert']
 
+    # Create samplers
     train_sampler = StepSampler(env_train, cfg['max_traj_length'])  # .unwrapped
     eval_sampler = TrajSampler(env_test, cfg['max_traj_length'])  # .unwrapped
+
+    # Create replay buffer
     replay_buffer = ReplayBuffer(cfg['replay_buffer_size'])
     set_random_seed(cfg["repeat_run"])
 
+    # Create relevant networks (Critics, Target Critics, Perturbations, Policies)
     policy = TanhGaussianPolicy(
         eval_sampler.env.observation_space.shape[0],
         eval_sampler.env.action_space.shape[0],
@@ -143,47 +189,18 @@ def main(cfg):
         cfg['target_entropy'] = -np.prod(eval_sampler.env.action_space.shape).item()
 
     # Get agent
-    if cfg['agent_name'] == 'SAC':
-        agent = dict_agents[agent_name](cfg,
-                                        policy,
-                                        sampler_policy,
-                                        qf1,
-                                        qf2,
-                                        target_qf1,
-                                        target_qf2)
-    elif cfg['agent_name'] == 'SAG':
-        agent = dict_agents[agent_name](cfg,
-                                        policy,
-                                        sampler_policy,
-                                        qf1,
-                                        qf2,
-                                        target_qf1,
-                                        target_qf2,
-                                        use_local=lambda_s,
-                                        local_expert=local_expert)
-    elif cfg['agent_name'] == 'PIG':
-        agent = dict_agents[agent_name](cfg,
-                                        policy,
-                                        sampler_policy,
-                                        qf1,
-                                        qf2,
-                                        target_qf1,
-                                        target_qf2,
-                                        use_local=lambda_s,
-                                        local_expert=local_expert,
-                                        beta=cfg['beta'])
-    else:
-        agent = dict_agents[agent_name](cfg,
-                                        policy,
-                                        sampler_policy,
-                                        qf1,
-                                        qf2,
-                                        target_qf1,
-                                        target_qf2,
-                                        use_local=lambda_s,
-                                        local_expert=local_expert,
-                                        parametrized_perturbation=parametrized_perturbation,
-                                        sampler_parametrized_perturbation=sampler_parametrized_perturbation)
+    agent = create_agent(cfg,
+                         agent_name,
+                         policy,
+                         sampler_policy,
+                         qf1,
+                         qf2,
+                         target_qf1,
+                         target_qf2,
+                         lambda_s,
+                         local_expert,
+                         parametrized_perturbation,
+                         sampler_parametrized_perturbation)
     agent.torch_to_device(cfg['device'])
 
     # put beta right if PAG without decay parameter
@@ -253,6 +270,7 @@ def main(cfg):
                 if agent_name in ['PIG', 'PAG']:
                     metrics[f'beta'] = agent.beta
 
+        # Report metrics to ray tune
         if epoch == 0 or (epoch + 1) % cfg['eval_period'] == 0 or epoch == cfg['n_epochs'] - 1:
             metrics['epoch'] = epoch
             metrics['rollout_time'] = rollout_timer()
@@ -263,6 +281,7 @@ def main(cfg):
             # Report metrics
             tune.report(**metrics)
 
+        # Save agent policy if required
         if epoch % cfg['num_epoch_save'] == 0 and cfg['agent_name'] == 'SAC' and epoch > 0:
             act_fn = cfg['activation_fn']
             save_path_init = os.path.join(cfg['orig_cwd'],
diff --git a/RLLG/setup.py b/RLLG/setup.py
index aeb2f3e5..7369c5b9 100644
--- a/RLLG/setup.py
+++ b/RLLG/setup.py
@@ -1,27 +1,35 @@
-# Created by Paul Daoudi
-# Date: 11/02/2023
-
-from setuptools import setup
-
-setup(author='Paul Daoudi',
-      name='rllg',
-      version='0.1.0',
-      install_requires=[
-            'setuptools',
-            'numpy==1.23.1',
-            'torch==1.10.2',
-            'tensorboardX==2.4.1',
-            'mujoco-py==2.1.2.14',
-            'omegaconf==2.1.1',
-            'gym==0.21.0',
-            'ray[tune]',
-            'pyyaml',
-            'matplotlib',
-            'ipython',
-            'pandas',
-            'matplotlib',
-            'jupyter',
-            'ml-collections',
-            'scipy'
-      ]
-)
\ No newline at end of file
+# Created by Paul Daoudi
+# Date: 11/02/2023
+
+from setuptools import setup, find_packages
+
+setup(author='Paul Daoudi',
+      name='rllg',
+      version='0.1.0',
+      packages=find_packages(include=['my_package', 'my_package.*']),
+      install_requires=[
+            'setuptools==65.5.0',
+            'wheel==0.38.0',
+            'numpy==1.23.1',
+            'torch==1.10.2',
+            'tensorboardX==2.4.1',
+            'mujoco-py==2.1.2.14',
+            'omegaconf==2.1.1',
+            'protobuf==3.20.0',
+            # Install gym by hand. Works with traditional pip install -e . inside a conda env,
+            # but problem with docker otherwise.
+            # 'gym==0.21.0',
+            'ray[tune]==1.9.2',
+            'pyyaml',
+            'matplotlib',
+            'ipython',
+            'pandas',
+            'matplotlib',
+            'jupyter',
+            'ml-collections',
+            'scipy',
+            # Install dmc2gym by hand, with git clone git+https://github.com/denisyarats/dmc2gym.git and
+            # pip install -e .
+            'dmc2gym @ git+https://github.com/denisyarats/dmc2gym.git'
+      ]
+)