Merge pull request rail-berkeley#1 from rpinsler/fix_dqn

Fixes DQN.
KunBB · May 2, 2018 · e9ea00a · e9ea00a
2 parents 8c2ee5d + f041fc7
commit e9ea00a
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 4 deletions.
diff --git a/rlkit/data_management/env_replay_buffer.py b/rlkit/data_management/env_replay_buffer.py
@@ -1,3 +1,4 @@
+import numpy as np
 from rlkit.data_management.simple_replay_buffer import SimpleReplayBuffer
 from gym.spaces import Box, Discrete, Tuple
 
@@ -21,6 +22,15 @@ def __init__(
             action_dim=get_dim(self._action_space),
         )
 
+    def add_sample(self, observation, action, reward, terminal,
+            next_observation, **kwargs):
+
+        if isinstance(self._action_space, Discrete):
+            action = np.eye(self._action_space.n)[action]
+        super(EnvReplayBuffer, self).add_sample(
+                observation, action, reward, terminal, 
+                next_observation, **kwargs)
+
 
 def get_dim(space):
     if isinstance(space, Box):

diff --git a/rlkit/exploration_strategies/epsilon_greedy.py b/rlkit/exploration_strategies/epsilon_greedy.py
@@ -17,10 +17,6 @@ def __init__(self, action_space, prob_random_action=0.1):
         self.prob_random_action = prob_random_action
         self.action_space = action_space
 
-    def get_action(self, t, observation, policy, **kwargs):
-        action, agent_info = policy.get_action(observation)
-        return self.get_action_from_raw_action(action, **kwargs), agent_info
-
     def get_action_from_raw_action(self, action, **kwargs):
         if random.random() <= self.prob_random_action:
             return self.action_space.sample()