Skip to content

Commit

Permalink
more ewma fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
justheuristic committed Jul 9, 2018
1 parent e26230f commit 6a155aa
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 29 deletions.
97 changes: 71 additions & 26 deletions week5_explore/week5.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from abc import ABCMeta, abstractmethod, abstractproperty\n",
Expand Down Expand Up @@ -61,7 +63,9 @@
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class BernoulliBandit:\n",
Expand Down Expand Up @@ -95,7 +99,9 @@
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class AbstractAgent(metaclass=ABCMeta): \n",
Expand Down Expand Up @@ -160,7 +166,9 @@
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class EpsilonGreedyAgent(AbstractAgent):\n",
Expand Down Expand Up @@ -202,7 +210,9 @@
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class UCBAgent(AbstractAgent):\n",
Expand Down Expand Up @@ -240,7 +250,9 @@
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class ThompsonSamplingAgent(AbstractAgent):\n",
Expand All @@ -251,7 +263,9 @@
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def plot_regret(env, agents, n_steps=5000, n_trials=50):\n",
Expand Down Expand Up @@ -339,7 +353,9 @@
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class DriftingBandit(BernoulliBandit):\n",
Expand Down Expand Up @@ -431,7 +447,9 @@
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# YOUR AGENT HERE SECTION"
Expand Down Expand Up @@ -519,7 +537,9 @@
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import theano, theano.tensor as T\n",
Expand All @@ -544,7 +564,9 @@
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class BNNAgent:\n",
Expand Down Expand Up @@ -650,7 +672,9 @@
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"N_ITERS = 100"
Expand All @@ -659,7 +683,9 @@
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def get_new_samples(states, action_rewards, batch_size=10):\n",
Expand All @@ -671,7 +697,9 @@
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from IPython.display import clear_output\n",
Expand All @@ -695,7 +723,7 @@
" clear_output(True)\n",
" print(\"iteration #%i\\tmean reward=%.3f\\tmse=%.3f\\tkl=%.3f\"%(i,np.mean(rewards_history[-10:]),mse,kl))\n",
" plt.plot(rewards_history)\n",
" plt.plot(pandas.ewma(np.array(rewards_history),alpha=0.1))\n",
" plt.plot(moving_average(np.array(rewards_history),alpha=0.1))\n",
" plt.title(\"Reward per epesode\")\n",
" plt.xlabel(\"Episode\")\n",
" plt.ylabel(\"Reward\")\n",
Expand All @@ -710,7 +738,7 @@
" plt.title(\"p(Q(s, a))\")\n",
" plt.show()\n",
" \n",
" return pandas.ewma(np.array(rewards_history),alpha=0.1)"
" return moving_average(np.array(rewards_history),alpha=0.1)"
]
},
{
Expand Down Expand Up @@ -778,7 +806,9 @@
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class ThompsonBNNAgent(BNNAgent):\n",
Expand Down Expand Up @@ -847,7 +877,9 @@
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class BayesUCBBNNAgent(BNNAgent):\n",
Expand Down Expand Up @@ -976,7 +1008,9 @@
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class RiverSwimEnv:\n",
Expand Down Expand Up @@ -1059,7 +1093,9 @@
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class QLearningAgent:\n",
Expand Down Expand Up @@ -1094,7 +1130,9 @@
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def train_mdp_agent(agent, env, n_episodes):\n",
Expand Down Expand Up @@ -1148,7 +1186,7 @@
"rews = train_mdp_agent(agent, env, 1000)\n",
"plt.figure(figsize=(15, 8))\n",
"\n",
"plt.plot(pandas.ewma(np.array(rews),alpha=.1))\n",
"plt.plot(moving_average(np.array(rews),alpha=.1))\n",
"plt.xlabel(\"Episode count\")\n",
"plt.ylabel(\"Reward\")\n",
"plt.show()"
Expand All @@ -1164,7 +1202,9 @@
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def plot_policy(agent):\n",
Expand Down Expand Up @@ -1246,7 +1286,9 @@
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def sample_normal_gamma(mu, lmbd, alpha, beta):\n",
Expand Down Expand Up @@ -1333,13 +1375,16 @@
}
],
"source": [
"from pandas import DataFrame\n",
"moving_average = lambda x, **kw: DataFrame({'x':np.asarray(x)}).x.ewm(**kw).mean().values\n",
"\n",
"horizon = 20\n",
"env = RiverSwimEnv(max_steps=horizon)\n",
"agent = PsrlAgent(env.n_states, env.n_actions, horizon=horizon)\n",
"rews = train_mdp_agent(agent, env, 1000)\n",
"\n",
"plt.figure(figsize=(15, 8))\n",
"plt.plot(pandas.ewma(np.array(rews), alpha=0.1))\n",
"plt.plot(moving_average(np.array(rews), alpha=0.1))\n",
"\n",
"plt.xlabel(\"Episode count\")\n",
"plt.ylabel(\"Reward\")\n",
Expand Down Expand Up @@ -1404,7 +1449,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
"version": "3.6.2"
}
},
"nbformat": 4,
Expand Down
8 changes: 5 additions & 3 deletions week6_policy_based/homework_tensorflow.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,9 @@
"source": [
"from IPython.display import clear_output\n",
"from tqdm import trange\n",
"from pandas import ewma\n",
"from pandas import DataFrame\n",
"moving_average = lambda x, **kw: DataFrame({'x':np.asarray(x)}).x.ewm(**kw).mean().values\n",
"\n",
"env_batch = EnvBatch(10)\n",
"batch_states = env_batch.reset()\n",
"\n",
Expand Down Expand Up @@ -511,12 +513,12 @@
" plt.figure(figsize=[8,4])\n",
" plt.subplot(1,2,1)\n",
" plt.plot(rewards_history, label='rewards')\n",
" plt.plot(ewma(np.array(rewards_history),span=10), marker='.', label='rewards ewma@10')\n",
" plt.plot(moving_aewmaverage(np.array(rewards_history),span=10), marker='.', label='rewards ewma@10')\n",
" plt.title(\"Session rewards\"); plt.grid(); plt.legend()\n",
" \n",
" plt.subplot(1,2,2)\n",
" plt.plot(entropy_history, label='entropy')\n",
" plt.plot(ewma(np.array(entropy_history),span=1000), label='entropy ewma@1000')\n",
" plt.plot(moving_average(np.array(entropy_history),span=1000), label='entropy ewma@1000')\n",
" plt.title(\"Policy entropy\"); plt.grid(); plt.legend() \n",
" plt.show()\n",
" \n",
Expand Down

0 comments on commit 6a155aa

Please sign in to comment.