@@ -52,27 +52,28 @@ def check_state_exist(self, state):
52
52
53
53
54
54
class EnvModel :
55
- """Similar to the memory buffer of DQN, you can store past experiences in here"""
55
+ """Similar to the memory buffer in DQN, you can store past experiences in here.
56
+ Alternatively, the model can generate next state and reward signal accurately."""
56
57
def __init__ (self , actions ):
57
58
# the simplest case is to think about the model is a memory which has all past transition information
58
59
self .actions = actions
59
- self .memory = pd .DataFrame (columns = actions , dtype = np .object )
60
+ self .database = pd .DataFrame (columns = actions , dtype = np .object )
60
61
61
62
def store_transition (self , s , a , r , s_ ):
62
- if s not in self .memory .index :
63
- self .memory = self .memory .append (
63
+ if s not in self .database .index :
64
+ self .database = self .database .append (
64
65
pd .Series (
65
66
[None ] * len (self .actions ),
66
- index = self .memory .columns ,
67
+ index = self .database .columns ,
67
68
name = s ,
68
69
))
69
- self .memory .set_value (s , a , (r , s_ ))
70
+ self .database .set_value (s , a , (r , s_ ))
70
71
71
72
def sample_s_a (self ):
72
- s = np .random .choice (self .memory .index )
73
- a = np .random .choice (self .memory .ix [s ].dropna ().index ) # filter out the None value
73
+ s = np .random .choice (self .database .index )
74
+ a = np .random .choice (self .database .ix [s ].dropna ().index ) # filter out the None value
74
75
return s , a
75
76
76
77
def get_r_s_ (self , s , a ):
77
- r , s_ = self .memory .ix [s , a ]
78
+ r , s_ = self .database .ix [s , a ]
78
79
return r , s_
0 commit comments