Skip to content

Commit

Permalink
Merge pull request Grid2op#230 from BDonnot/bd_dev
Browse files Browse the repository at this point in the history
Some more fix for the alarm feature
  • Loading branch information
BDonnot authored Jun 18, 2021
2 parents e19cb75 + 318ef31 commit 6de94f2
Show file tree
Hide file tree
Showing 15 changed files with 174 additions and 29 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,9 @@ issue_208_res/
test_issue_208.py
test_issue_220.py
test_issue_*.py
res_alert/
env_debug_time_last_alarm_inconsistency.zip
env_debug_time_last_alarm_inconsistency/

# profiling files
**.prof
9 changes: 9 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ Change Log
- [BREAKING] the "Runner.py" file has been renamed, following pep convention "runner.py". You should rename your
import `from grid2op.Runner.Runner import Runner` to `from grid2op.Runner.runner import Runner`
(**NB** we higly recommend importing the `Runner` like `from grid2op.Runner import Runner` though !)
- [FIXED]: the L2RPN_2020 score has been updated to reflect the score used during these competitions (there was an
error between `DoNothingAgent` and `RecoPowerlineAgent`)
[see `Issue#228 <https://github.com/rte-france/Grid2Op/issues/228>`_ ]
- [FIXED]: some bugs in the `action_space.get_all_unitary_redispatch` and `action_space.get_all_unitary_curtail`
- [FIXED]: some bugs in the `GreedyAgent` and `TopologyGreedy`
- [FIXED]: `Issue#220 <https://github.com/rte-france/Grid2Op/issues/220>`_ `flow_bus_matrix` did not took into
Expand All @@ -41,6 +44,12 @@ Change Log
if there is nothing controllable in grid2op present in it.
- [FIXED]: an issue where the parameters would not be completely saved when saved in json format (alarm feature was
absent) (related to `Issue#224 <https://github.com/rte-france/Grid2Op/issues/224>`_ )
- [FIXED]: an error caused by the observation non being copied when a game over occurred that caused some issue in
some cases (related to `Issue#226 <https://github.com/rte-france/Grid2Op/issues/226>`_ )
- [FIXED]: a bug in the opponent space where the "`previous_fail`" kwargs was not updated properly and send wrongly
to the opponent
- [FIXED]: a bug in the geometric opponent when it did attack that failed.
- [FIXED]: `Issue#229 <https://github.com/rte-france/Grid2Op/issues/229>`_ typo in the `AlarmReward` class when reset.
- [ADDED] support for the "alarm operator" / "attention budget" feature
- [ADDED] retrieval of the `max_step` (ie the maximum number of step that can be performed for the current episode)
in the observation
Expand Down
6 changes: 3 additions & 3 deletions docs/environment.rst
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,7 @@ You can then use, in the above case:
import grid2op
env_name = "l2rpn_case14_sandbox" # matching above
env_train = grid2op.make(env_name+"_train") to only use the "training chronics"
env_train = grid2op.make(env_name+"_train") # to only use the "training chronics"
# do whatever you want with env_train
And then, at time of validation:
Expand All @@ -574,8 +574,8 @@ And then, at time of validation:
import grid2op
env_name = "l2rpn_case14_sandbox" # matching above
env_train = grid2op.make(env_name+"_val") to only use the "training chronics"
# do whatever you want with env_train
env_val = grid2op.make(env_name+"_val") # to only use the "validation chronics"
# do whatever you want with env_val
As of now, grid2op do not support "from the API" the possibility to split with convenient
Expand Down
2 changes: 1 addition & 1 deletion docs/modeled_elements.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.. |5subs_grid_layout| image:: ./img/5subs_grid_layout.jpg
.. |5subs_grid_5_sub1_graph| image:: ./img/5subs_grid_5_sub1_graph.jpg
.. |5subs_grid_all_1| image:: ./img/5subs_grid_all_1jpg
.. |5subs_grid_all_1| image:: ./img/5subs_grid_all_1.jpg
.. |5subs_grid_5_sub1_2_graph| image:: ./img/5subs_grid_5_sub1_2_graph.jpg

.. _modeled-elements-module:
Expand Down
13 changes: 11 additions & 2 deletions grid2op/Environment/BaseEnv.py
Original file line number Diff line number Diff line change
Expand Up @@ -1432,7 +1432,7 @@ def _handle_updown_times(self, gen_up_before, redisp_act):
self._gen_downtime[gen_still_disconnected] += 1
return except_

def get_obs(self):
def get_obs(self, _update_state=True):
"""
Return the observations of the current environment made by the :class:`grid2op.BaseAgent.BaseAgent`.
Expand Down Expand Up @@ -1461,7 +1461,7 @@ def get_obs(self):
# obs2 and obs are identical.
"""
res = self._observation_space(env=self)
res = self._observation_space(env=self, _update_state=_update_state)
return res

def get_thermal_limit(self):
Expand Down Expand Up @@ -1662,6 +1662,14 @@ def step(self, action):
action = agent.act(obs, reward, done)
obs, reward, done, info = env.step(action)
Notes
-----
If the flag `done=True` is raised (*ie* this is the end of the episode) then the observation is NOT properly
updated and should not be used at all.
Actually, it will be in a "game over" state (see :class:`grid2op.Observation.BaseObservation.set_game_over`).
"""

if not self.__is_init:
Expand Down Expand Up @@ -1938,6 +1946,7 @@ def step(self, action):
# forward to the observation if an alarm is used or not
if hasattr(self._reward_helper.template_reward, "has_alarm_component"):
self._is_alarm_used_in_reward = self._reward_helper.template_reward.is_alarm_used
self.current_obs = self.get_obs(_update_state=False)
# update the observation so when it's plotted everything is "shutdown"
self.current_obs.set_game_over(self)

Expand Down
9 changes: 5 additions & 4 deletions grid2op/Observation/ObservationSpace.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,16 +180,17 @@ def reset_space(self):
self.obs_env.reset_space()
self.action_helper_env.actionClass.reset_space()

def __call__(self, env):
def __call__(self, env, _update_state=True):
if self.with_forecast:
self.obs_env.update_grid(env)

res = self.observationClass(obs_env=self.obs_env,
action_helper=self.action_helper_env)

# TODO how to make sure that whatever the number of time i call "simulate" i still get the same observations
# TODO use self.obs_prng when updating actions
res.update(env=env, with_forecast=self.with_forecast)
if _update_state:
# TODO how to make sure that whatever the number of time i call "simulate" i still get the same observations
# TODO use self.obs_prng when updating actions
res.update(env=env, with_forecast=self.with_forecast)
return res

def size_obs(self):
Expand Down
2 changes: 1 addition & 1 deletion grid2op/Observation/_ObsEnv.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ def simulate(self, action):
obs, reward, done, info = self.step(action)
return obs, reward, done, info

def get_obs(self):
def get_obs(self, _update_state=False):
"""
INTERNAL
Expand Down
8 changes: 6 additions & 2 deletions grid2op/Opponent/GeometricOpponent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.

import warnings
import numpy as np

Expand Down Expand Up @@ -224,7 +225,6 @@ def attack(self, observation, agent_action, env_action,
duration: ``int``
The duration of the attack (if ``None`` then the attack will be made for the longest allowed time)
"""

# During creation of the environment, do not attack
if observation is None:
return None, None
Expand All @@ -233,6 +233,11 @@ def attack(self, observation, agent_action, env_action,
if self._attack_counter >= self._number_of_attacks:
return None, None

if previous_fails:
# i cannot do the attack, it failed (so self._attack_counter >= 1)
self._next_attack_time = self._attack_waiting_times[self._attack_counter] + \
self._attack_durations[self._attack_counter - 1]

# Set the time of the next attack
if self._next_attack_time is None:
self._next_attack_time = 1 + self._attack_waiting_times[self._attack_counter]
Expand Down Expand Up @@ -279,7 +284,6 @@ def attack(self, observation, agent_action, env_action,
raw_probabilities = np.exp(b_beta * rho_ranks)
b_probabilities = raw_probabilities / raw_probabilities.sum()
attack = self.space_prng.choice(available_attacks, p=b_probabilities)

return attack, attack_duration

def get_state(self):
Expand Down
14 changes: 11 additions & 3 deletions grid2op/Opponent/OpponentSpace.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def reset(self):
self.current_attack_cooldown = self.attack_cooldown
self.last_attack = None
self.opponent.reset(self.budget)
self.previous_fails = False

def _get_state(self):
# used for simulate
Expand Down Expand Up @@ -172,36 +173,43 @@ def attack(self, observation, agent_action, env_action):

# If the opponent can attack
else:
self.previous_fails = False
attack_called = True
attack, duration = self.opponent.attack(observation, agent_action, env_action, self.budget,
attack, duration = self.opponent.attack(observation,
agent_action,
env_action,
self.budget,
self.previous_fails)
if duration is None:
if np.isfinite(self.attack_max_duration):
duration = self.attack_max_duration
else:
duration = 1

self.previous_fails = False

if duration > self.attack_max_duration:
# duration chosen by the opponent would exceed the maximum duration allowed
attack = None
self.previous_fails = True

# If the cost is too high
final_budget = self.budget # TODO add the: + self.budget_per_timestep * (self.attack_duration - 1)

# i did not do it in case an attack is ok at the beginning, ok at the end, but at some point in the attack
# process it is not (but i'm not sure this can happen, and don't have time to think about it right now)
if duration * self.compute_budget(attack) > final_budget:
attack = None
self.previous_fails = True

# If we can afford the attack
elif attack is not None:
if attack is not None:
# even if it's "do nothing", it's sill an attack. Too bad if the opponent chose to do nothing.
self.current_attack_duration = duration
self.current_attack_cooldown += self.attack_cooldown

if not attack_called:
self.opponent.tell_attack_continues(observation, agent_action, env_action, self.budget)
self.previous_fails = False

self.budget -= self.compute_budget(attack)
self.last_attack = attack
Expand Down
7 changes: 6 additions & 1 deletion grid2op/Parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,12 @@ def init_from_json(self, json_path):
warn_msg = "Could not load from {}\n" \
"Continuing with default parameters. \n\nThe error was \"{}\""
warnings.warn(warn_msg.format(json_path, exc_))


def __eq__(self, other):
this_dict = self.to_dict()
other_dict = other.to_dict()
return this_dict == other_dict

@staticmethod
def from_json(json_path):
"""
Expand Down
6 changes: 3 additions & 3 deletions grid2op/Reward/AlarmReward.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ def initialize(self, env):

def reset(self, env):
self.total_time_steps = env.max_episode_duration()
self.window_size = env.parameters.ALARM_BEST_TIME
self.best_time = env.parameters.ALARM_WINDOW_SIZE
self.best_time = env.parameters.ALARM_BEST_TIME
self.window_size = env.parameters.ALARM_WINDOW_SIZE

def _tmp_score_time(self, step_alarm, step_game_over):
"""
Expand Down Expand Up @@ -151,7 +151,7 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
disc_lines = env._disc_lines

# so now i can consider the alarms.
best_score, is_alarm_used = -1, False
best_score, is_alarm_used = self.reward_min, False
for alarm in successfull_alarms:
tmp_sc, tmp_is = self._points_for_alarm(*alarm,
step_game_over=step_game_over,
Expand Down
33 changes: 33 additions & 0 deletions grid2op/tests/test_Opponent.py
Original file line number Diff line number Diff line change
Expand Up @@ -1492,6 +1492,39 @@ def test_simulate(self):
assert obs.rho[line_id] > 0.
assert obs.line_status[line_id]

def test_last_attack(self):
init_budget = 500
param = Parameters()
param.NO_OVERFLOW_DISCONNECTION = True
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
with make("l2rpn_case14_sandbox",
test=True,
opponent_init_budget=init_budget,
opponent_budget_per_ts=200.,
opponent_attack_cooldown=0, # only for testing
opponent_attack_duration=30, # max
opponent_action_class=TopologyAction,
opponent_budget_class=BaseActionBudget,
opponent_class=GeometricOpponent,
param=param,
kwargs_opponent={"lines_attacked": LINES_ATTACKED}) as env:
env.seed(0)
_ = env.reset()
# opponent = env._opponent
# opponent._attack_durations : should be [31, 32, 31, 25]
# opponent._attack_times : should be [64, 407, 487, 522]
dn = env.action_space()
for ts in range(522):
# here the opponent cannot attack due to the `opponent_attack_duration` that is too low
# chosen duration is below max_duration, so attack is not done.
obs, reward, done, info = env.step(dn)
assert info["opponent_attack_line"] is None

# opponent should attack at this exact step
obs, reward, done, info = env.step(dn)
assert info["opponent_attack_line"] is not None


if __name__ == "__main__":
unittest.main()
Loading

0 comments on commit 6de94f2

Please sign in to comment.