Merge pull request Grid2op#230 from BDonnot/bd_dev

Some more fix for the alarm feature
hslu-deep · Jun 18, 2021 · 6de94f2 · 6de94f2
2 parents e19cb75 + 318ef31
commit 6de94f2
Show file tree

Hide file tree

Showing 15 changed files with 174 additions and 29 deletions.
diff --git a/.gitignore b/.gitignore
@@ -313,6 +313,9 @@ issue_208_res/
 test_issue_208.py
 test_issue_220.py
 test_issue_*.py
+res_alert/
+env_debug_time_last_alarm_inconsistency.zip
+env_debug_time_last_alarm_inconsistency/
 
 # profiling files
 **.prof
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -33,6 +33,9 @@ Change Log
 - [BREAKING] the "Runner.py" file has been renamed, following pep convention "runner.py". You should rename your
   import `from grid2op.Runner.Runner import Runner` to `from grid2op.Runner.runner import Runner`
   (**NB** we higly recommend importing the `Runner` like `from grid2op.Runner import Runner` though !)
+- [FIXED]: the L2RPN_2020 score has been updated to reflect the score used during these competitions (there was an
+  error between `DoNothingAgent` and `RecoPowerlineAgent`)
+  [see `Issue#228 <https://github.com/rte-france/Grid2Op/issues/228>`_ ]
 - [FIXED]: some bugs in the `action_space.get_all_unitary_redispatch` and `action_space.get_all_unitary_curtail`
 - [FIXED]: some bugs in the `GreedyAgent` and `TopologyGreedy`
 - [FIXED]: `Issue#220 <https://github.com/rte-france/Grid2Op/issues/220>`_ `flow_bus_matrix` did not took into
@@ -41,6 +44,12 @@ Change Log
   if there is nothing controllable in grid2op present in it.
 - [FIXED]: an issue where the parameters would not be completely saved when saved in json format (alarm feature was
   absent) (related to `Issue#224 <https://github.com/rte-france/Grid2Op/issues/224>`_ )
+- [FIXED]: an error caused by the observation non being copied when a game over occurred that caused some issue in
+  some cases (related to `Issue#226 <https://github.com/rte-france/Grid2Op/issues/226>`_ )
+- [FIXED]: a bug in the opponent space where the "`previous_fail`" kwargs was not updated properly and send wrongly
+  to the opponent
+- [FIXED]: a bug in the geometric opponent when it did attack that failed.
+- [FIXED]: `Issue#229 <https://github.com/rte-france/Grid2Op/issues/229>`_ typo in the  `AlarmReward` class when reset.
 - [ADDED] support for the "alarm operator" / "attention budget" feature
 - [ADDED] retrieval of the `max_step` (ie the maximum number of step that can be performed for the current episode)
   in the observation

diff --git a/docs/environment.rst b/docs/environment.rst
@@ -564,7 +564,7 @@ You can then use, in the above case:
     import grid2op
     env_name = "l2rpn_case14_sandbox"  # matching above
 
-    env_train = grid2op.make(env_name+"_train") to only use the "training chronics"
+    env_train = grid2op.make(env_name+"_train")  # to only use the "training chronics"
     # do whatever you want with env_train
 
 And then, at time of validation:
@@ -574,8 +574,8 @@ And then, at time of validation:
     import grid2op
     env_name = "l2rpn_case14_sandbox"  # matching above
 
-    env_train = grid2op.make(env_name+"_val") to only use the "training chronics"
-    # do whatever you want with env_train
+    env_val = grid2op.make(env_name+"_val") # to only use the "validation chronics"
+    # do whatever you want with env_val
 
 
 As of now, grid2op do not support "from the API" the possibility to split with convenient

diff --git a/docs/modeled_elements.rst b/docs/modeled_elements.rst
@@ -1,6 +1,6 @@
 .. |5subs_grid_layout| image:: ./img/5subs_grid_layout.jpg
 .. |5subs_grid_5_sub1_graph| image:: ./img/5subs_grid_5_sub1_graph.jpg
-.. |5subs_grid_all_1| image:: ./img/5subs_grid_all_1jpg
+.. |5subs_grid_all_1| image:: ./img/5subs_grid_all_1.jpg
 .. |5subs_grid_5_sub1_2_graph| image:: ./img/5subs_grid_5_sub1_2_graph.jpg
 
 .. _modeled-elements-module:

diff --git a/grid2op/Environment/BaseEnv.py b/grid2op/Environment/BaseEnv.py
@@ -1432,7 +1432,7 @@ def _handle_updown_times(self, gen_up_before, redisp_act):
         self._gen_downtime[gen_still_disconnected] += 1
         return except_
 
-    def get_obs(self):
+    def get_obs(self, _update_state=True):
         """
         Return the observations of the current environment made by the :class:`grid2op.BaseAgent.BaseAgent`.
 
@@ -1461,7 +1461,7 @@ def get_obs(self):
             # obs2 and obs are identical.
 
         """
-        res = self._observation_space(env=self)
+        res = self._observation_space(env=self, _update_state=_update_state)
         return res
 
     def get_thermal_limit(self):
@@ -1662,6 +1662,14 @@ def step(self, action):
                 action = agent.act(obs, reward, done)
                 obs, reward, done, info = env.step(action)
 
+        Notes
+        -----
+
+        If the flag `done=True` is raised (*ie* this is the end of the episode) then the observation is NOT properly
+        updated and should not be used at all.
+
+        Actually, it will be in a "game over" state (see :class:`grid2op.Observation.BaseObservation.set_game_over`).
+
         """
 
         if not self.__is_init:
@@ -1938,6 +1946,7 @@ def step(self, action):
             # forward to the observation if an alarm is used or not
             if hasattr(self._reward_helper.template_reward, "has_alarm_component"):
                 self._is_alarm_used_in_reward = self._reward_helper.template_reward.is_alarm_used
+            self.current_obs = self.get_obs(_update_state=False)
             # update the observation so when it's plotted everything is "shutdown"
             self.current_obs.set_game_over(self)
 

diff --git a/grid2op/Observation/ObservationSpace.py b/grid2op/Observation/ObservationSpace.py
@@ -180,16 +180,17 @@ def reset_space(self):
             self.obs_env.reset_space()
         self.action_helper_env.actionClass.reset_space()
 
-    def __call__(self, env):
+    def __call__(self, env, _update_state=True):
         if self.with_forecast:
             self.obs_env.update_grid(env)
 
         res = self.observationClass(obs_env=self.obs_env,
                                     action_helper=self.action_helper_env)
 
-        # TODO how to make sure that whatever the number of time i call "simulate" i still get the same observations
-        # TODO use self.obs_prng when updating actions
-        res.update(env=env, with_forecast=self.with_forecast)
+        if _update_state:
+            # TODO how to make sure that whatever the number of time i call "simulate" i still get the same observations
+            # TODO use self.obs_prng when updating actions
+            res.update(env=env, with_forecast=self.with_forecast)
         return res
 
     def size_obs(self):

diff --git a/grid2op/Observation/_ObsEnv.py b/grid2op/Observation/_ObsEnv.py
@@ -444,7 +444,7 @@ def simulate(self, action):
         obs, reward, done, info = self.step(action)
         return obs, reward, done, info
 
-    def get_obs(self):
+    def get_obs(self, _update_state=False):
         """
         INTERNAL
 

diff --git a/grid2op/Opponent/GeometricOpponent.py b/grid2op/Opponent/GeometricOpponent.py
@@ -5,6 +5,7 @@
 # you can obtain one at http://mozilla.org/MPL/2.0/.
 # SPDX-License-Identifier: MPL-2.0
 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
+
 import warnings
 import numpy as np
 
@@ -224,7 +225,6 @@ def attack(self, observation, agent_action, env_action,
         duration: ``int``
             The duration of the attack (if ``None`` then the attack will be made for the longest allowed time)
         """
-
         # During creation of the environment, do not attack
         if observation is None:
             return None, None
@@ -233,6 +233,11 @@ def attack(self, observation, agent_action, env_action,
         if self._attack_counter >= self._number_of_attacks:
             return None, None
 
+        if previous_fails:
+            # i cannot do the attack, it failed (so self._attack_counter >= 1)
+            self._next_attack_time = self._attack_waiting_times[self._attack_counter] + \
+                                     self._attack_durations[self._attack_counter - 1]
+
         # Set the time of the next attack
         if self._next_attack_time is None:
             self._next_attack_time = 1 + self._attack_waiting_times[self._attack_counter]
@@ -279,7 +284,6 @@ def attack(self, observation, agent_action, env_action,
         raw_probabilities = np.exp(b_beta * rho_ranks)
         b_probabilities = raw_probabilities / raw_probabilities.sum()
         attack = self.space_prng.choice(available_attacks, p=b_probabilities)
-
         return attack, attack_duration
 
     def get_state(self):

diff --git a/grid2op/Opponent/OpponentSpace.py b/grid2op/Opponent/OpponentSpace.py
@@ -93,6 +93,7 @@ def reset(self):
         self.current_attack_cooldown = self.attack_cooldown
         self.last_attack = None
         self.opponent.reset(self.budget)
+        self.previous_fails = False
 
     def _get_state(self):
         # used for simulate
@@ -172,36 +173,43 @@ def attack(self, observation, agent_action, env_action):
 
         # If the opponent can attack  
         else:
-            self.previous_fails = False
             attack_called = True
-            attack, duration = self.opponent.attack(observation, agent_action, env_action, self.budget,
+            attack, duration = self.opponent.attack(observation,
+                                                    agent_action,
+                                                    env_action,
+                                                    self.budget,
                                                     self.previous_fails)
             if duration is None:
                 if np.isfinite(self.attack_max_duration):
                     duration = self.attack_max_duration
                 else:
                     duration = 1
 
+            self.previous_fails = False
+
             if duration > self.attack_max_duration:
                 # duration chosen by the opponent would exceed the maximum duration allowed
                 attack = None
+                self.previous_fails = True
 
             # If the cost is too high
             final_budget = self.budget  # TODO add the: + self.budget_per_timestep * (self.attack_duration - 1)
+
             # i did not do it in case an attack is ok at the beginning, ok at the end, but at some point in the attack
             # process it is not (but i'm not sure this can happen, and don't have time to think about it right now)
             if duration * self.compute_budget(attack) > final_budget:
                 attack = None
                 self.previous_fails = True
 
             # If we can afford the attack
-            elif attack is not None:
+            if attack is not None:
                 # even if it's "do nothing", it's sill an attack. Too bad if the opponent chose to do nothing.
                 self.current_attack_duration = duration
                 self.current_attack_cooldown += self.attack_cooldown
 
         if not attack_called:
             self.opponent.tell_attack_continues(observation, agent_action, env_action, self.budget)
+            self.previous_fails = False
 
         self.budget -= self.compute_budget(attack)
         self.last_attack = attack

diff --git a/grid2op/Parameters.py b/grid2op/Parameters.py
@@ -318,7 +318,12 @@ def init_from_json(self, json_path):
             warn_msg = "Could not load from {}\n" \
                        "Continuing with default parameters. \n\nThe error was \"{}\""
             warnings.warn(warn_msg.format(json_path, exc_))
-
+
+    def __eq__(self, other):
+        this_dict = self.to_dict()
+        other_dict = other.to_dict()
+        return this_dict == other_dict
+
     @staticmethod
     def from_json(json_path):
         """

diff --git a/grid2op/Reward/AlarmReward.py b/grid2op/Reward/AlarmReward.py
@@ -70,8 +70,8 @@ def initialize(self, env):
 
     def reset(self, env):
         self.total_time_steps = env.max_episode_duration()
-        self.window_size = env.parameters.ALARM_BEST_TIME
-        self.best_time = env.parameters.ALARM_WINDOW_SIZE
+        self.best_time = env.parameters.ALARM_BEST_TIME
+        self.window_size = env.parameters.ALARM_WINDOW_SIZE
 
     def _tmp_score_time(self, step_alarm, step_game_over):
         """
@@ -151,7 +151,7 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
             disc_lines = env._disc_lines
 
             # so now i can consider the alarms.
-            best_score, is_alarm_used = -1, False
+            best_score, is_alarm_used = self.reward_min, False
             for alarm in successfull_alarms:
                 tmp_sc, tmp_is = self._points_for_alarm(*alarm,
                                                         step_game_over=step_game_over,

diff --git a/grid2op/tests/test_Opponent.py b/grid2op/tests/test_Opponent.py
@@ -1492,6 +1492,39 @@ def test_simulate(self):
                 assert obs.rho[line_id] > 0.
                 assert obs.line_status[line_id]
 
+    def test_last_attack(self):
+        init_budget = 500
+        param = Parameters()
+        param.NO_OVERFLOW_DISCONNECTION = True
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore")
+            with make("l2rpn_case14_sandbox",
+                      test=True,
+                      opponent_init_budget=init_budget,
+                      opponent_budget_per_ts=200.,
+                      opponent_attack_cooldown=0,  # only for testing
+                      opponent_attack_duration=30,  # max
+                      opponent_action_class=TopologyAction,
+                      opponent_budget_class=BaseActionBudget,
+                      opponent_class=GeometricOpponent,
+                      param=param,
+                      kwargs_opponent={"lines_attacked": LINES_ATTACKED}) as env:
+                env.seed(0)
+                _ = env.reset()
+                # opponent = env._opponent
+                # opponent._attack_durations : should be [31, 32, 31, 25]
+                # opponent._attack_times : should be [64, 407, 487, 522]
+                dn = env.action_space()
+                for ts in range(522):
+                    # here the opponent cannot attack due to the `opponent_attack_duration` that is too low
+                    # chosen duration is below max_duration, so attack is not done.
+                    obs, reward, done, info = env.step(dn)
+                    assert info["opponent_attack_line"] is None
+
+                # opponent should attack at this exact step
+                obs, reward, done, info = env.step(dn)
+                assert info["opponent_attack_line"] is not None
+
 
 if __name__ == "__main__":
     unittest.main()