edit

MorvanZhou · MorvanZhou · commit bd0245981d11 · 2017-03-19T04:38:21.000+11:00
diff --git a/Reinforcement_learning_TUT/README.md b/Reinforcement_learning_TUT/README.md
@@ -0,0 +1,20 @@
+# Reinforcement Methods and Tutorials
+
+All methods mentioned below have their video and text tutorial in Chinese. Visit [莫烦 Python](https://morvanzhou.github.io/tutorials/) for more.
+
+
+* [Simple entry example](https://github.com/MorvanZhou/tutorials/tree/master/Reinforcement_learning_TUT/1_command_line_reinforcement_learning)
+* Tabular Methods
+  * [Q-learning](https://github.com/MorvanZhou/tutorials/tree/master/Reinforcement_learning_TUT/2_Q_Learning_maze)
+  * [Sarsa](https://github.com/MorvanZhou/tutorials/tree/master/Reinforcement_learning_TUT/3_Sarsa_maze)
+  * [Sarsa(lambda)](https://github.com/MorvanZhou/tutorials/tree/master/Reinforcement_learning_TUT/4_Sarsa_lambda_maze)
+* Function Approximation (DQN)
+  * [Deep Q Network](https://github.com/MorvanZhou/tutorials/tree/master/Reinforcement_learning_TUT/5_Deep_Q_Network)
+* [Using OpenAI Gym](https://github.com/MorvanZhou/tutorials/tree/master/Reinforcement_learning_TUT/6_OpenAI_gym)
+* DQN-based methods
+  * [Double DQN](https://github.com/MorvanZhou/tutorials/tree/master/Reinforcement_learning_TUT/5.1_Double_DQN)
+  * [DQN with Prioitized Experience Replay](https://github.com/MorvanZhou/tutorials/tree/master/Reinforcement_learning_TUT/5.2_Prioritized_Replay_DQN)
+  * [Dueling DQN](https://github.com/MorvanZhou/tutorials/tree/master/Reinforcement_learning_TUT/5.3_Dueling_DQN)
+* [Policy Gradients](https://github.com/MorvanZhou/tutorials/tree/master/Reinforcement_learning_TUT/7_Policy_gradient_softmax)
+* [Actor Critic](https://github.com/MorvanZhou/tutorials/tree/master/Reinforcement_learning_TUT/8_Actor_Critic_Advantage)
+  * [Deep Deterministic Policy Gradient](https://github.com/MorvanZhou/tutorials/tree/master/Reinforcement_learning_TUT/9_Deep_Deterministic_Policy_Gradient_DDPG)
diff --git a/tensorflowTUT/tf17_dropout/full_code.py b/tensorflowTUT/tf17_dropout/full_code.py
@@ -31,7 +31,7 @@ def add_layer(inputs, in_size, out_size, layer_name, activation_function=None, )
         outputs = Wx_plus_b
     else:
         outputs = activation_function(Wx_plus_b, )
-    tf.histogram_summary(layer_name + '/outputs', outputs)
+    tf.summary.histogram(layer_name + '/outputs', outputs)
     return outputs
 
 
@@ -47,14 +47,14 @@ def add_layer(inputs, in_size, out_size, layer_name, activation_function=None, )
 # the loss between prediction and real data
 cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction),
                                               reduction_indices=[1]))  # loss
-tf.scalar_summary('loss', cross_entropy)
+tf.summary.scalar('loss', cross_entropy)
 train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
 
 sess = tf.Session()
-merged = tf.merge_all_summaries()
+merged = tf.summary.merge_all()
 # summary writer goes in here
-train_writer = tf.train.SummaryWriter("logs/train", sess.graph)
-test_writer = tf.train.SummaryWriter("logs/test", sess.graph)
+train_writer = tf.summary.FileWriter("logs/train", sess.graph)
+test_writer = tf.summary.FileWriter("logs/test", sess.graph)
 
 # tf.initialize_all_variables() no long valid from
 # 2017-03-02 if using tensorflow >= 0.12