Merge pull request Grid2op#137 from BDonnot/alarm_score

Alarm score
hslu-deep · Jun 21, 2021 · 4a9acb0 · 4a9acb0
2 parents 318ef31 + 2d03b63
commit 4a9acb0
Show file tree

Hide file tree

Showing 4 changed files with 683 additions and 0 deletions.
diff --git a/grid2op/Reward/AlarmReward.py b/grid2op/Reward/AlarmReward.py
@@ -104,6 +104,9 @@ def _mult_for_zone(self, alarm, disc_lines, env):
         # extract the lines that have been disconnected due to cascading failures
         lines_disconnected_first = np.where(disc_lines == 0)[0]
 
+        if (np.sum(alarm) > 1):#if we have more than one zone in the alarm, we cannot discrtiminate, no bonus points
+            return res
+
         # extract the zones they belong too
         zones_these_lines = set()
         zone_for_each_lines = env.alarms_lines_area

diff --git a/grid2op/Reward/_AlarmScore.py b/grid2op/Reward/_AlarmScore.py
@@ -0,0 +1,144 @@
+# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
+# See AUTHORS.txt
+# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
+# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
+# you can obtain one at http://mozilla.org/MPL/2.0/.
+# SPDX-License-Identifier: MPL-2.0
+# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
+
+import numpy as np
+
+from grid2op.Exceptions import Grid2OpException
+from grid2op.Reward import AlarmReward
+from grid2op.dtypes import dt_float
+import copy
+
+class _AlarmScore(AlarmReward):
+    """
+        .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\
+            It **must not** serve as a reward. This scored needs to be minimized,
+            and a reward needs to be maximized! Also, this "reward" is not scaled or anything. Use it as your
+            own risk.
+
+    Implemented as a reward to make it easier to use in the context of the L2RPN competitions, this "reward"
+    computed the "grid operation cost". It should not be used to train an agent.
+
+    The "reward" the closest to this score is given by the :class:`AlarmReward` class.
+
+    This reward is based on the "alarm feature" where the agent is asked to send information about potential issue
+    on the grid.
+    On this case, when the environment is in a "game over" state (eg it's the end) then the reward is computed
+    the following way:
+    - if the environment has been successfully manage until the end of the chronics, then 1.0 is returned
+    - if no alarm has been raised, then -2.0 is return
+    - points for pointing to the right zones are computed based on the lines disconnected either in a short window
+    before game over or otherwise at the time of game over
+    Examples
+    ---------
+    You can use this reward in any environment with:
+
+    .. code-block:
+
+        import grid2op
+        from grid2op.Reward import AlarmReward
+        # then you create your environment with it:
+        NAME_OF_THE_ENVIRONMENT = "rte_case14_realistic"
+        env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=AlarmScore)
+        # and do a step with a "do nothing" action
+        obs = env.reset()
+        obs, reward, done, info = env.step(env.action_space())
+        # the reward is computed with the AlarmScore class
+
+    """
+
+    def __init__(self):
+        AlarmReward.__init__(self)
+        # required if you want to design a custom reward taking into account the
+        # alarm feature
+        self.reward_min = dt_float(-2.)
+        #we keep other parameters values from AlarmReward as is
+
+        self.mult_for_right_zone = 1.5
+        self.window_disconnection = 4
+
+        self.disc_lines_all_before_cascade = []
+
+    def initialize(self, env):
+        if not env._has_attention_budget:
+            raise Grid2OpException("Impossible to use the \"AlarmReward\" with an environment for which this feature "
+                                   "is disabled. Please make sure \"env._has_attention_budget\" is set to ``True`` or "
+                                   "change the reward class with `grid2op.make(..., reward_class=AnyOtherReward)`")
+        self.n_line=env.n_line
+        self.reset(env)
+
+    def reset(self, env):
+        super().reset(env)
+        self.window_disconnection = max(self.best_time - self.window_size, 4)
+        self.disc_lines_all_before_cascade = []
+
+    def _lines_disconnected_first(self, disc_lines_at_cascading_time):
+        """
+        here we detect the disconnected lines that we will consider to compute the mult_for_zone multiplifying factor.
+        Either the lines that were disconnected in a short period before final failure. Otherwise the first lines
+        disconnedted at the time of failure
+
+        :param disc_lines_at_cascading_time: lines that are disconnected first at time of failure
+        :return:
+        """
+
+        disc_lines_to_consider_for_score = np.zeros(self.n_line,dtype=bool)
+
+        nb_obs = len(self.disc_lines_all_before_cascade)
+
+        for t in range(nb_obs - self.window_disconnection, nb_obs):
+            disc_lines_to_consider_for_score[self.disc_lines_all_before_cascade[t] >= 0]=True
+
+        if (np.sum(disc_lines_to_consider_for_score) == 0):
+            disc_lines_to_consider_for_score = disc_lines_at_cascading_time==0
+
+        #if we are there, it is because we have identified before that the failure is due to disconnected powerlines
+        assert(np.any(disc_lines_to_consider_for_score))
+
+        #we transform the vector so that diconnected lines have a zero, to be coherent with env._disc_lines
+        return 1-disc_lines_to_consider_for_score
+
+    def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
+
+        disc_lines_now = env._disc_lines
+
+        if is_done:
+            if not has_error:
+                # agent went until the end
+                return self.reward_max
+
+            if np.all(env._disc_lines == -1):
+                # game over is not caused by the tripping of a powerline
+                return self.reward_min
+
+            if len(env._attention_budget._all_successful_alarms) == 0:
+                # no alarm have been sent, so it's the minimum
+                return self.reward_min
+
+            successfull_alarms = env._attention_budget._all_successful_alarms
+            step_game_over = env.nb_time_step
+
+            disc_lines_to_consider_for_score = self._lines_disconnected_first(disc_lines_now)
+
+            # so now i can consider the alarms.
+            best_score, is_alarm_used = self.reward_min, False
+            for alarm in successfull_alarms:
+                tmp_sc, tmp_is = self._points_for_alarm(*alarm,
+                                                        step_game_over=step_game_over,
+                                                        disc_lines=disc_lines_to_consider_for_score,
+                                                        env=env)
+                if tmp_sc > best_score:
+                    best_score = tmp_sc
+                    is_alarm_used = tmp_is
+
+            self.is_alarm_used = is_alarm_used
+            return best_score
+        else:
+            # make sure to deepcopy, otherwise it gets updated with the last timestep value for every previous timesteps
+            self.disc_lines_all_before_cascade.append(copy.deepcopy(disc_lines_now))  # we log the line disconnected over time
+            res = self.reward_no_game_over
+        return res
diff --git a/grid2op/Reward/__init__.py b/grid2op/Reward/__init__.py
@@ -17,6 +17,7 @@
     "BaseReward",
     "EpisodeDurationReward",
     "AlarmReward",
+    "_AlarmScore",
     # TODO it would be better to have a specific package for this, but in the mean time i put it here
     "L2RPNSandBoxScore"
 ]
@@ -40,6 +41,7 @@
 from grid2op.Reward.L2RPNSandBoxScore import L2RPNSandBoxScore
 from grid2op.Reward.EpisodeDurationReward import EpisodeDurationReward
 from grid2op.Reward.AlarmReward import AlarmReward
+from grid2op.Reward._AlarmScore import _AlarmScore
 
 import warnings