API documentation for some of the key RL classes.

ckeown · Nov 6, 2009 · e2ab57b · e2ab57b
1 parent 1fb4dfb
commit e2ab57b
Show file tree

Hide file tree

Showing 28 changed files with 281 additions and 82 deletions.
diff --git a/docs/sphinx/api/optimization/optimization.txt b/docs/sphinx/api/optimization/optimization.txt
@@ -15,6 +15,7 @@ The two base classes
 
 .. autoclass:: ContinuousOptimizer
    :members: __init__
+   :show-inheritance:
 
 
 General Black-box optimizers

diff --git a/docs/sphinx/api/rl/actionvalues.txt b/docs/sphinx/api/rl/actionvalues.txt
@@ -0,0 +1,16 @@
+:mod:`actionvalues` -- RL Components: ActionValues
+================================================
+
+.. automodule:: pybrain.rl.learners.valuebased.interface
+
+.. autoclass:: ActionValueInterface
+   :members:
+
+.. autoclass:: ActionValueTable
+   :members:
+   :show-inheritance:
+
+.. autoclass:: ActionValueNetwork
+   :members:
+   :show-inheritance:
+
diff --git a/docs/sphinx/api/rl/agents.txt b/docs/sphinx/api/rl/agents.txt
@@ -0,0 +1,24 @@
+:mod:`agents` -- RL Components: Agents
+================================================
+
+.. automodule:: pybrain.rl.agents.agent
+
+.. autoclass:: Agent
+   :members:
+
+.. automodule:: pybrain.rl.agents.logging
+
+.. autoclass:: LoggingAgent
+   :members: 
+   :show-inheritance:
+
+.. automodule:: pybrain.rl.agents
+
+.. autoclass:: LearningAgent
+   :members: 
+   :show-inheritance:
+
+.. autoclass:: OptimizationAgent
+   :members: 
+   :show-inheritance:
+
diff --git a/docs/sphinx/api/rl/experiments.txt b/docs/sphinx/api/rl/experiments.txt
@@ -0,0 +1,15 @@
+:mod:`experiments` -- RL Components: Experiments
+================================================
+
+.. automodule:: pybrain.rl.experiments
+
+.. autoclass:: Experiment
+   :members:
+
+.. autoclass:: EpisodicExperiment
+   :members:
+   :show-inheritance:
+
+.. autoclass:: ContinuousExperiment
+   :members:
+   :show-inheritance:
diff --git a/docs/sphinx/api/rl/explorers.txt b/docs/sphinx/api/rl/explorers.txt
@@ -0,0 +1,44 @@
+:mod:`explorers` -- RL Components: Explorers
+================================================
+
+.. automodule:: pybrain.rl.explorers.explorer
+
+.. autoclass:: Explorer
+   :members:
+
+Continuous Explorers
+--------------------
+
+.. automodule:: pybrain.rl.explorers.continuous
+
+.. autoclass:: NormalExplorer
+   :members:   
+
+.. automodule:: pybrain.rl.explorers.continuous.sde
+
+.. autoclass:: StateDependentExplorer
+   :members:
+
+Discrete Explorers
+--------------------
+
+.. automodule:: pybrain.rl.explorers.discrete.discrete
+
+.. autoclass:: DiscreteExplorer
+   :members: _setModule
+   :show-inheritance:
+
+.. automodule:: pybrain.rl.explorers.discrete
+
+.. autoclass:: EpsilonGreedyExplorer
+   :members: _forwardImplementation
+   :show-inheritance:
+
+.. autoclass:: BoltzmannExplorer
+   :members: activate, _forwardImplementation
+   :show-inheritance:
+
+.. autoclass:: DiscreteStateDependentExplorer
+   :members: activate, _forwardImplementation
+   :show-inheritance:
+
diff --git a/docs/sphinx/api/rl/learners.txt b/docs/sphinx/api/rl/learners.txt
@@ -0,0 +1,86 @@
+:mod:`learners` -- RL Components: Learners
+================================================
+
+Abstract classes
+-----------------
+
+.. automodule:: pybrain.rl.learners.learner
+
+.. autoclass:: Learner
+   :members:
+
+.. autoclass:: EpisodicLearner
+   :show-inheritance:
+
+.. autoclass:: DataSetLearner
+   :show-inheritance:
+
+.. autoclass:: ExploringLearner
+   :show-inheritance:
+
+.. autoclass:: OntogeneticLearner
+   :show-inheritance:
+
+.. automodule:: pybrain.rl.learners.directsearch.directsearch
+
+.. autoclass:: DirectSearchLearner
+   :show-inheritance:
+
+.. autoclass:: PhylogeneticLearner
+   :show-inheritance:
+
+
+Value-based Learners
+------------------------
+
+.. automodule:: pybrain.rl.learners.valuebased.valuebased
+
+.. autoclass:: ValueBasedLearner
+   :members:
+   :show-inheritance:
+
+.. automodule:: pybrain.rl.learners.valuebased
+
+.. autoclass:: Q
+   :members:
+   :show-inheritance:
+
+.. autoclass:: QLambda
+   :members:
+   :show-inheritance: 
+
+.. autoclass:: SARSA
+   :members:
+   :show-inheritance: 
+
+.. autoclass:: NFQ
+   :members:
+   :show-inheritance: 
+
+
+Direct-search Learners
+------------------------
+
+.. automodule:: pybrain.rl.learners.directsearch.policygradient
+
+.. autoclass:: PolicyGradientLearner
+   :members:
+   :show-inheritance: 
+
+.. automodule:: pybrain.rl.learners.directsearch.reinforce
+
+.. autoclass:: Reinforce
+   :members:
+   :show-inheritance: 
+
+.. automodule:: pybrain.rl.learners.directsearch.enac
+
+.. autoclass:: ENAC
+   :members:
+   :show-inheritance: 
+
+
+.. note:: 
+	..
+	Black-box optimization algorithms can also be seen as direct-search RL algorithms, but are not included here.   
+
diff --git a/docs/sphinx/api/rl/tasks.txt b/docs/sphinx/api/rl/tasks.txt
@@ -0,0 +1,13 @@
+:mod:`tasks` -- RL Components: Tasks
+================================================
+
+.. automodule:: pybrain.rl.environments.task
+
+.. autoclass:: Task
+   :members:
+
+.. automodule:: pybrain.rl.environments.episodic
+
+.. autoclass:: EpisodicTask
+   :members: 
+   :show-inheritance:
diff --git a/docs/sphinx/index.txt b/docs/sphinx/index.txt
@@ -80,6 +80,7 @@ API
    :glob:
 
    api/structure/*
+   api/rl/*
    api/optimization/*
    api/datasets/*
    api/supervised/*

diff --git a/pybrain/rl/agents/agent.py b/pybrain/rl/agents/agent.py
@@ -8,26 +8,26 @@ class Agent(Named):
     """
 
     def integrateObservation(self, obs):
-        """ integrate the current observation of the environment.
+        """ Integrate the current observation of the environment.
             @param obs: The last observation returned from the environment
             @type obs: by default, this is assumed to be a numpy array of doubles
         """
         pass
 
     def getAction(self):
-        """ return a chosen action.
-            @rtype: by default, this is assumed to ba a numpy array of doubles.
+        """ Return a chosen action.
+            @rtype: by default, this is assumed to be a numpy array of doubles.
             @note: This method is abstract and needs to be implemented.
         """
         abstractMethod()
 
     def giveReward(self, r):
-        """ reward or punish the agent.
+        """ Reward or punish the agent.
             @param r: reward, if C{r} is positive, punishment if C{r} is negative
             @type r: double            
         """             
         pass
 
     def newEpisode(self):
-        """ Informing the agent that a new episode has started. """
+        """ Inform the agent that a new episode has started. """
         pass
diff --git a/pybrain/rl/agents/learning.py b/pybrain/rl/agents/learning.py
@@ -28,12 +28,12 @@ def __init__(self, module, learner = None):
 
 
     def _getLearning(self):
-        """ returns whether the agent currently learns from experience or not. """
+        """ Return whether the agent currently learns from experience or not. """
         return self.__learning
 
 
     def _setLearning(self, flag):
-        """ set whether or not the agent should learn from its experience """
+        """ Set whether or not the agent should learn from its experience """
         if self.learner is not None:
             self.__learning = flag
         else:
@@ -43,8 +43,8 @@ def _setLearning(self, flag):
 
 
     def getAction(self):
-        """ activates the module with the last observation, adds the exploration from
-            the explorer object and stores the result as last action. """
+        """ Activate the module with the last observation, adda the exploration from
+            the explorer object and store the result as last action. """
         LoggingAgent.getAction(self)
 
         self.lastaction = self.module.activate(self.lastobs)
@@ -56,7 +56,7 @@ def getAction(self):
 
 
     def newEpisode(self):
-        """ indicates the beginning of a new episode in the training cycle. """
+        """ Indicate the beginning of a new episode in the training cycle. """
         if self.logging:
             self.history.newSequence()
 
@@ -65,15 +65,15 @@ def newEpisode(self):
             self.learner.newEpisode() 
 
     def reset(self):
-        """ clears the history of the agent and resets the module and learner. """
+        """ Clear the history of the agent and resets the module and learner. """
         LoggingAgent.reset(self)
         self.module.reset()
         if self.learning:
             self.learner.reset()
 
 
     def learn(self, episodes=1):
-        """ calls the learner's learn function, which has access to both module and history. """
+        """ Call the learner's learn method, which has access to both module and history. """
         if self.learning:
             self.learner.learnEpisodes(episodes)
 
diff --git a/pybrain/rl/agents/logging.py b/pybrain/rl/agents/logging.py
@@ -29,15 +29,15 @@ def __init__(self, indim, outdim):
 
 
     def integrateObservation(self, obs):
-        """ 1. stores the observation received in a temporary variable until action is called and
-            reward is given. """
+        """Step 1: store the observation received in a temporary variable until action is called and
+        reward is given. """
         self.lastobs = obs
         self.lastaction = None
         self.lastreward = None
 
 
     def getAction(self):
-        """ 2. stores the action in a temporary variable until reward is given. """
+        """Step 2: store the action in a temporary variable until reward is given. """
         assert self.lastobs != None 
         assert self.lastaction == None
         assert self.lastreward == None
@@ -46,7 +46,7 @@ def getAction(self):
 
 
     def giveReward(self, r):
-        """ 3. stores observation, action and reward in the history dataset. """
+        """Step 3: store observation, action and reward in the history dataset. """
         # step 3: assume that state and action have been set
         assert self.lastobs != None
         assert self.lastaction != None
@@ -63,13 +63,13 @@ def giveReward(self, r):
 
 
     def newEpisode(self):
-        """ inidicates the beginning of a new episode in the training cycle. """
+        """ Indicate the beginning of a new episode in the training cycle. """
         if self.logging:
             self.history.newSequence()  
 
 
     def reset(self):
-        """ clears the history of the agent. """
+        """ Clear the history of the agent. """
         self.lastobs = None
         self.lastaction = None
         self.lastreward = None