Skip to content

Commit

Permalink
old updates
Browse files Browse the repository at this point in the history
  • Loading branch information
Abhijeet1990 committed Sep 9, 2022
1 parent b417bd9 commit 2bfbbdf
Show file tree
Hide file tree
Showing 22 changed files with 321 additions and 31 deletions.
3 changes: 3 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions .idea/BayesianRL_AVC.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions .idea/inspectionProfiles/Project_Default.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added BayesianRL_DQN/Reward_varying_samples_uf_500.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
102 changes: 71 additions & 31 deletions BayesianRL_DQN/bdqn_gridenv.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,18 @@ def createActionSpace():
space = list(itertools.product(*iterables))
return space

def dqn(n_episodes=1000, max_t=500, eps_start=1.0, eps_end=0.01, eps_decay=0.995, BDQN = False, update_freq = 500):
# Smoothing over a window
def avg_window_smooth(values, window_size = 10):
smoothed = []
i=0
while i < len(values) - window_size + 1:
curr_window = values[i: i+ window_size]
avg = sum(curr_window)/ window_size
smoothed.append(avg)
i+=1
return smoothed

def dqn(n_episodes=1000, max_t=500, eps_start=1.0, eps_end=0.01, eps_decay=0.995, BDQN = False, update_freq = 500,samples=50000):
"""Deep Q-Learning.
Params
Expand Down Expand Up @@ -117,18 +128,22 @@ def dqn(n_episodes=1000, max_t=500, eps_start=1.0, eps_end=0.01, eps_decay=0.995
state_size = 9
action_size = 125

agent = MCMCDQNAgent(state_size=state_size, action_size=action_size, seed=0,update_freq=update_freq)
agent = MCMCDQNAgent(state_size=state_size, action_size=action_size, seed=0,update_freq=update_freq,samples=samples)

scores = [] # list containing scores from each episode
scores_window = deque(maxlen=100) # last 100 scores
eps = eps_start # initialize epsilon
episodes = []
episodes_window = deque(maxlen=100)
for i_episode in range(1, n_episodes + 1):
state = env.resetGen()
score = 0
counter=0
state = env.nextSet()
state = state.to_numpy()
for t in range(max_t):
print(f"{t} time step of episode {i_episode}")
#print(f"{t} time step of episode {i_episode}")
counter+=1
action = agent.SelectAction(state, eps)
next_state, reward, done, _ = env.step(action)
next_state = next_state.to_numpy()
Expand All @@ -140,49 +155,74 @@ def dqn(n_episodes=1000, max_t=500, eps_start=1.0, eps_end=0.01, eps_decay=0.995
score += reward
if done:
break
print("score=", score)
episodes_window.append(counter)
episodes.append(counter)
#print("score=", score)
scores_window.append(score) # save most recent score
scores.append(score) # save most recent score
eps = max(eps_end, eps_decay * eps) # decrease epsilon
print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")
#print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")
if i_episode % 100 == 0:
print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
print('\rEpisode {}\tAverage Score: {:.2f}\tAverage Episode: {:.2f}'.format(i_episode, np.mean(scores_window), np.mean(episodes_window)))
if np.mean(scores_window) >= 200.0:
print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode - 100,
np.mean(scores_window)))
if BDQN:
torch.save(agent.Q.state_dict(), 'bdqn_checkpoint_500_1L.pth')
torch.save(agent.Q.state_dict(), 'bdqn_checkpoint_uf_'+str(update_freq)+'_samples_'+str(samples)+'.pth')
else:
torch.save(agent.Q.state_dict(), 'dqn_checkpoint_100.pth')
torch.save(agent.Q.state_dict(), 'dqn_checkpoint_uf_'+str(update_freq)+'_samples_'+str(samples)+'.pth')
break
return scores


scores = dqn(BDQN=False,update_freq=500)

# Smoothing over a window
def avg_window_smooth(values, window_size = 10):
smoothed = []
i=0
while i < len(values) - window_size + 1:
curr_window = values[i: i+ window_size]
avg = sum(curr_window)/ window_size
smoothed.append(avg)
i+=1
return smoothed

return scores,episodes

uf=500
#samples=50000
#scores,episodes = dqn(BDQN=False,update_freq=uf,samples=samples)

#plt.plot(avg_window_smooth(scores,50))

# plot the scores
fig = plt.figure()
fig = plt.figure(0)
ax = fig.add_subplot(111)
#plt.plot(np.arange(len(scores)), scores)
plt.plot(np.arange(len(avg_window_smooth(scores,50))),avg_window_smooth(scores,50))
color=['r','g','k','b']
ctr=0
# for sample in range(25000,125000,25000):
# scores_bdqn,episodes_bdqn = dqn(BDQN=True,update_freq=uf,samples=sample)
# plt.plot(np.arange(len(avg_window_smooth(scores_bdqn, 50))), avg_window_smooth(scores_bdqn, 50), color[ctr],label='Samples='+str(sample))
# ctr+=1
# plt.ylabel('Score')
# plt.xlabel('Episode #')
# plt.legend()
# fig.savefig('Reward_varying_samples_uf_'+str(uf)+'.jpg', dpi=150)
samples=100000
for uf in range(100,600,100):
scores_bdqn,episodes_bdqn = dqn(BDQN=True,update_freq=uf,samples=samples)
plt.plot(np.arange(len(avg_window_smooth(scores_bdqn, 50))), avg_window_smooth(scores_bdqn, 50), color[ctr],label='uf='+str(uf))
ctr+=1
plt.ylabel('Score')
plt.xlabel('Episode #')
fig.savefig('avg_reward_BDQN_update_freq_500steps_1L.jpg', dpi=150)
plt.legend()
fig.savefig('Reward_varying_uf_samples_'+str(samples)+'.jpg', dpi=150)


# # plot the scores
# fig = plt.figure(0)
# ax = fig.add_subplot(111)
# #plt.plot(np.arange(len(scores)), scores)
# plt.plot(np.arange(len(avg_window_smooth(scores,50))),avg_window_smooth(scores,50),'r')
# plt.plot(np.arange(len(avg_window_smooth(scores_bdqn,50))),avg_window_smooth(scores_bdqn,50),'g')
# plt.ylabel('Score')
# plt.xlabel('Episode #')
# fig.savefig('Reward_DQN_vs_BDQN_uf_'+str(uf)+'_samples_'+str(samples)+'.jpg', dpi=150)
#
#
# # plot the scores
# fig2 = plt.figure(1)
# ax2 = fig2.add_subplot(111)
# #plt.plot(np.arange(len(scores)), scores)
# plt.plot(np.arange(len(avg_window_smooth(episodes,50))),avg_window_smooth(episodes,50),'r')
# plt.plot(np.arange(len(avg_window_smooth(episodes_bdqn,50))),avg_window_smooth(episodes_bdqn,50),'g')
# plt.ylabel('Episode Length')
# plt.xlabel('Episode #')
# fig2.savefig('Episode_Len_DQN_vs_BDQN_uf_'+str(uf)+'_samples_'+str(samples)+'.jpg', dpi=150)


plt.show()


Binary file not shown.
Loading

0 comments on commit 2bfbbdf

Please sign in to comment.