diff --git a/week5_explore/week5.ipynb b/week5_explore/week5.ipynb index 4f9beb699..4bb327724 100644 --- a/week5_explore/week5.ipynb +++ b/week5_explore/week5.ipynb @@ -3,7 +3,9 @@ { "cell_type": "code", "execution_count": 84, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from abc import ABCMeta, abstractmethod, abstractproperty\n", @@ -61,7 +63,9 @@ { "cell_type": "code", "execution_count": 88, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "class BernoulliBandit:\n", @@ -95,7 +99,9 @@ { "cell_type": "code", "execution_count": 89, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "class AbstractAgent(metaclass=ABCMeta): \n", @@ -160,7 +166,9 @@ { "cell_type": "code", "execution_count": 90, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "class EpsilonGreedyAgent(AbstractAgent):\n", @@ -202,7 +210,9 @@ { "cell_type": "code", "execution_count": 91, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "class UCBAgent(AbstractAgent):\n", @@ -240,7 +250,9 @@ { "cell_type": "code", "execution_count": 95, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "class ThompsonSamplingAgent(AbstractAgent):\n", @@ -251,7 +263,9 @@ { "cell_type": "code", "execution_count": 93, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def plot_regret(env, agents, n_steps=5000, n_trials=50):\n", @@ -339,7 +353,9 @@ { "cell_type": "code", "execution_count": 97, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "class DriftingBandit(BernoulliBandit):\n", @@ -431,7 +447,9 @@ { "cell_type": "code", "execution_count": 11, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# YOUR AGENT HERE SECTION" @@ -519,7 +537,9 @@ { "cell_type": "code", "execution_count": 14, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import theano, theano.tensor as T\n", @@ -544,7 +564,9 @@ { "cell_type": "code", "execution_count": 15, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "class BNNAgent:\n", @@ -650,7 +672,9 @@ { "cell_type": "code", "execution_count": 16, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "N_ITERS = 100" @@ -659,7 +683,9 @@ { "cell_type": "code", "execution_count": 17, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def get_new_samples(states, action_rewards, batch_size=10):\n", @@ -671,7 +697,9 @@ { "cell_type": "code", "execution_count": 100, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from IPython.display import clear_output\n", @@ -695,7 +723,7 @@ " clear_output(True)\n", " print(\"iteration #%i\\tmean reward=%.3f\\tmse=%.3f\\tkl=%.3f\"%(i,np.mean(rewards_history[-10:]),mse,kl))\n", " plt.plot(rewards_history)\n", - " plt.plot(pandas.ewma(np.array(rewards_history),alpha=0.1))\n", + " plt.plot(moving_average(np.array(rewards_history),alpha=0.1))\n", " plt.title(\"Reward per epesode\")\n", " plt.xlabel(\"Episode\")\n", " plt.ylabel(\"Reward\")\n", @@ -710,7 +738,7 @@ " plt.title(\"p(Q(s, a))\")\n", " plt.show()\n", " \n", - " return pandas.ewma(np.array(rewards_history),alpha=0.1)" + " return moving_average(np.array(rewards_history),alpha=0.1)" ] }, { @@ -778,7 +806,9 @@ { "cell_type": "code", "execution_count": 20, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "class ThompsonBNNAgent(BNNAgent):\n", @@ -847,7 +877,9 @@ { "cell_type": "code", "execution_count": 22, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "class BayesUCBBNNAgent(BNNAgent):\n", @@ -976,7 +1008,9 @@ { "cell_type": "code", "execution_count": 25, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "class RiverSwimEnv:\n", @@ -1059,7 +1093,9 @@ { "cell_type": "code", "execution_count": 26, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "class QLearningAgent:\n", @@ -1094,7 +1130,9 @@ { "cell_type": "code", "execution_count": 27, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def train_mdp_agent(agent, env, n_episodes):\n", @@ -1148,7 +1186,7 @@ "rews = train_mdp_agent(agent, env, 1000)\n", "plt.figure(figsize=(15, 8))\n", "\n", - "plt.plot(pandas.ewma(np.array(rews),alpha=.1))\n", + "plt.plot(moving_average(np.array(rews),alpha=.1))\n", "plt.xlabel(\"Episode count\")\n", "plt.ylabel(\"Reward\")\n", "plt.show()" @@ -1164,7 +1202,9 @@ { "cell_type": "code", "execution_count": 103, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def plot_policy(agent):\n", @@ -1246,7 +1286,9 @@ { "cell_type": "code", "execution_count": 105, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "def sample_normal_gamma(mu, lmbd, alpha, beta):\n", @@ -1333,13 +1375,16 @@ } ], "source": [ + "from pandas import DataFrame\n", + "moving_average = lambda x, **kw: DataFrame({'x':np.asarray(x)}).x.ewm(**kw).mean().values\n", + "\n", "horizon = 20\n", "env = RiverSwimEnv(max_steps=horizon)\n", "agent = PsrlAgent(env.n_states, env.n_actions, horizon=horizon)\n", "rews = train_mdp_agent(agent, env, 1000)\n", "\n", "plt.figure(figsize=(15, 8))\n", - "plt.plot(pandas.ewma(np.array(rews), alpha=0.1))\n", + "plt.plot(moving_average(np.array(rews), alpha=0.1))\n", "\n", "plt.xlabel(\"Episode count\")\n", "plt.ylabel(\"Reward\")\n", @@ -1404,7 +1449,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.2" + "version": "3.6.2" } }, "nbformat": 4, diff --git a/week6_policy_based/homework_tensorflow.ipynb b/week6_policy_based/homework_tensorflow.ipynb index 775e02aca..763a4725f 100644 --- a/week6_policy_based/homework_tensorflow.ipynb +++ b/week6_policy_based/homework_tensorflow.ipynb @@ -468,7 +468,9 @@ "source": [ "from IPython.display import clear_output\n", "from tqdm import trange\n", - "from pandas import ewma\n", + "from pandas import DataFrame\n", + "moving_average = lambda x, **kw: DataFrame({'x':np.asarray(x)}).x.ewm(**kw).mean().values\n", + "\n", "env_batch = EnvBatch(10)\n", "batch_states = env_batch.reset()\n", "\n", @@ -511,12 +513,12 @@ " plt.figure(figsize=[8,4])\n", " plt.subplot(1,2,1)\n", " plt.plot(rewards_history, label='rewards')\n", - " plt.plot(ewma(np.array(rewards_history),span=10), marker='.', label='rewards ewma@10')\n", + " plt.plot(moving_aewmaverage(np.array(rewards_history),span=10), marker='.', label='rewards ewma@10')\n", " plt.title(\"Session rewards\"); plt.grid(); plt.legend()\n", " \n", " plt.subplot(1,2,2)\n", " plt.plot(entropy_history, label='entropy')\n", - " plt.plot(ewma(np.array(entropy_history),span=1000), label='entropy ewma@1000')\n", + " plt.plot(moving_average(np.array(entropy_history),span=1000), label='entropy ewma@1000')\n", " plt.title(\"Policy entropy\"); plt.grid(); plt.legend() \n", " plt.show()\n", " \n",