Skip to content

Commit

Permalink
save parameters to the right folder
Browse files Browse the repository at this point in the history
- saved weights with correct file extensions

- cleaned up save_best_reward function in ppo2

- new parameters: saved_parameters (str list), only_save_best_reward (bool)

Rewards.csv can automatically adjust the columns based on the strs in saved_parameters

NOTE: **saved_parameters must include "filepath" and "ep_reward_mean" **

- if the dictionary PARAMETERS is changed, one must also change the list of keys "parameter_list" in configure_csv
  • Loading branch information
sophlzy3 committed Nov 22, 2024
1 parent 7cc0ca4 commit 191f843
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 46 deletions.
3 changes: 2 additions & 1 deletion flightrl/examples/run_drone_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ def main():
cliprange=0.2,
verbose=1,
n_save=5,
saved_parameters=["n_updates", "gamma","filepath","ep_reward_mean"]
saved_parameters=["ep_reward_mean", "gamma", "filepath"],
only_save_best_reward=True
)

# tensorboard
Expand Down
14 changes: 11 additions & 3 deletions flightrl/examples/successes/Rewards.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
n_updates,gamma,filepath,ep_reward_mean
5,0.99,/home/szylzz/Desktop/flightmare/flightrl/examples/saved/2024-11-21-20-03-50/checkpoints/2024-11-21-20-03-51_Iteration_5,-11.588208713375789
10,0.99,/home/szylzz/Desktop/flightmare/flightrl/examples/saved/2024-11-21-20-03-50/checkpoints/2024-11-21-20-03-51_Iteration_10,-17.444347778717873
ep_reward_mean,gamma,filepath
-11.588208713375789,,/home/szylzz/Desktop/flightmare/flightrl/examples/saved/2024-11-21-20-03-50/checkpoints/2024-11-21-20-03-51_Iteration_5
-17.444347778717873,,/home/szylzz/Desktop/flightmare/flightrl/examples/saved/2024-11-21-20-03-50/checkpoints/2024-11-21-20-03-51_Iteration_10
-17.899741551048646,,/home/szylzz/Desktop/flightmare/flightrl/examples/saved/2024-11-21-20-15-17/checkpoints/2024-11-21-20-15-17_Iteration_15
-13.992625453794608,0.99,/home/szylzz/Desktop/flightmare/flightrl/examples/saved/2024-11-21-21-48-46/checkpoints/2024-11-21-21-48-47_Iteration_15
-19.344266659280983,0.99,/home/szylzz/Desktop/flightmare/flightrl/examples/saved/2024-11-21-21-50-32/checkpoints/2024-11-21-21-50-33_Iteration_5
-11.30080051823199,0.99,/home/szylzz/Desktop/flightmare/flightrl/examples/saved/2024-11-21-21-56-25/checkpoints/2024-11-21-21-56-25_Iteration_5
-13.760591911603989,0.99,/home/szylzz/Desktop/flightmare/flightrl/examples/saved/2024-11-21-21-57-13/checkpoints/2024-11-21-21-57-14_Iteration_5
-14.346390655327122,0.99,/home/szylzz/Desktop/flightmare/flightrl/examples/saved/2024-11-21-21-58-49/checkpoints/2024-11-21-21-58-50_Iteration_5
-19.5220833185449,0.99,/home/szylzz/Desktop/flightmare/flightrl/examples/saved/2024-11-21-22-00-32_Iteration_15.zip
-14.576327628549189,0.99,/home/szylzz/Desktop/flightmare/flightrl/examples/saved/2024-11-21-22-01-06/checkpoints/2024-11-21-22-01-07_Iteration_10
105 changes: 63 additions & 42 deletions flightrl/rpg_baselines/ppo/ppo2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import datetime
import csv
import os
import shutil
#
import gym
import sys
Expand Down Expand Up @@ -54,7 +55,7 @@ class PPO2(ActorCriticRLModel):
:param n_cpu_tf_sess: (int) The number of threads for TensorFlow operations
If None, the number of cpu of the current machine will be used.
"""
def __init__(self, policy, env, n_save, saved_parameters, gamma=0.99, n_steps=128, ent_coef=0.01, learning_rate=2.5e-4, vf_coef=0.5,
def __init__(self, policy, env, n_save, saved_parameters, only_save_best_reward, gamma=0.99, n_steps=128, ent_coef=0.01, learning_rate=2.5e-4, vf_coef=0.5,
max_grad_norm=0.5, lam=0.95, nminibatches=4, noptepochs=4, cliprange=0.2, cliprange_vf=None,
verbose=0, tensorboard_log=None, _init_setup_model=True, policy_kwargs=None,
full_tensorboard_log=False, seed=None, n_cpu_tf_sess=None):
Expand All @@ -74,7 +75,8 @@ def __init__(self, policy, env, n_save, saved_parameters, gamma=0.99, n_steps=12
self.full_tensorboard_log = full_tensorboard_log

self.n_save = n_save
self.saved_parameters = saved_parameters
self.saved_parameters = saved_parameters # must include "filepath" and "ep_reward_mean"
self.only_save_best_reward = only_save_best_reward

self.action_ph = None
self.advs_ph = None
Expand Down Expand Up @@ -449,16 +451,16 @@ def learn(self, total_timesteps, log_dir, logger,
if callback(locals(), globals()) is False:
break

weight_path = log_dir + "/checkpoints/" +f"{date}_Iteration" + "_{}".format(update)
# save weights every n_save updates
if (self.n_save != 0) and (update % self.n_save == 0):
# save weights file
weight_path = log_dir + "/checkpoints/" +f"{date}_Iteration" + "_{}".format(update)
# save weights
self.save(weight_path)

# save training data on temporary csv
parameters = {
"date": date,
"filepath": weight_path,
"filepath": weight_path + ".zip",
"gamma": self.gamma,
"n_steps": self.n_steps,
"vf_coef": self.vf_coef,
Expand All @@ -485,13 +487,13 @@ def learn(self, total_timesteps, log_dir, logger,
with open(temp_path, mode='a', newline='') as csv_file:
csv_writer = csv.writer(csv_file)
row = [parameters[col] if col in parameters else None for col in self.saved_parameters]
print(row)
csv_writer.writerow(row)

except KeyboardInterrupt:
print("You have stopped the learning process by keyboard interrupt. Model Parameter is saved. \n")
print("\nYou have stopped the learning process by keyboard interrupt. Model Parameter is saved. \n")
# You can actually save files using the instance of self. save the model parameters.
self.save(log_dir + "_Iteration_{}".format(update))
# self.save(log_dir + "_Iteration_{}".format(update))
self.save(weight_path)
self.save_best_reward(temp_path, reward_path)
sys.exit()

Expand All @@ -514,7 +516,6 @@ def configure_csv(self, file_path, header):
with open(file_path, mode='w', newline='') as csv_file:
csv_writer = csv.writer(csv_file)
csv_writer.writerow(header) # Write the header
print(f"Header added to new or empty file: {file_path}")
return

# Ensure the directory exists
Expand Down Expand Up @@ -543,9 +544,6 @@ def configure_csv(self, file_path, header):
new_row = []
for col in header:
if col in header_index_map:
print("col: ", col)
print("header_index_map[col]: ", header_index_map[col])
print("row: ", row[header_index_map[col]])
new_row.append(row[header_index_map[col]])
else:
new_row.append(None) # Add None for missing columns
Expand All @@ -561,45 +559,68 @@ def configure_csv(self, file_path, header):

def save_best_reward(self, temp_path, reward_path):
# Initialize variables for the best reward
max_reward = float('-inf') # Start with negative infinity to handle any reward value
max_reward = float('-inf')
max_row = None

# Read the CSV file and find the row with the highest reward
with open(temp_path, mode='r', newline='') as csv_file:
csv_reader = csv.reader(csv_file)
# Skip the header
header = next(csv_reader, None)

# find row with the highest reward
for row in csv_reader:
try:
reward = float(row[2]) # Convert reward to a float
if reward > max_reward:
max_reward = reward
max_row = row
except (IndexError, ValueError):
# Handle rows with missing or invalid reward values
continue

# Append the best reward row to the final CSV file, creating it if necessary
if max_row:
print("max_row", max_row)
# Ensure the directory exists
os.makedirs(os.path.dirname(reward_path), exist_ok=True)

with open(reward_path, mode='a', newline='') as csv_file:
csv_writer = csv.writer(csv_file)
# Write the header if the file is empty
if os.path.getsize(reward_path) == 0 and header:
csv_writer.writerow(header)
# Write the best row
csv_writer.writerow(max_row)
try:
with open(temp_path, mode='r', newline='') as csv_file:
csv_reader = csv.reader(csv_file)
header = next(csv_reader, None) # Skip the header

# Find the row with the highest reward
reward_index = self.saved_parameters.index("ep_reward_mean")
for row in csv_reader:
try:
reward = float(row[reward_index])
if reward > max_reward:
max_reward = reward
max_row = row
except (IndexError, ValueError):
continue # Skip invalid rows
except FileNotFoundError:
print(f"Temporary file not found: {temp_path}")
return

# If configured, move the file and update the filepath
if self.only_save_best_reward and max_row:
try:
filepath_index = self.saved_parameters.index("filepath")
file_path = max_row[filepath_index]
current_folder = os.path.dirname(file_path)
parent_folder = os.path.dirname(os.path.dirname(current_folder)) # two levels up
destination_path = os.path.join(parent_folder, os.path.basename(file_path))

# Move the file
shutil.move(file_path, destination_path)
# print(f"Moved file from {file_path} to {destination_path}")

# Delete the now-empty folder
shutil.rmtree(current_folder, ignore_errors=True)
max_row[filepath_index] = destination_path
print("Checkpoint folder deleted: ", current_folder)
except Exception as e:
print(f"Error handling best reward file: {e}")

# Remove the temporary CSV file safely
try:
os.remove(temp_path)
print(f"Temporary file deleted: {temp_path}")
except OSError as e:
print(f"Error deleting temporary file: {e}")

# Write the header and best row to the final CSV file
if max_row:
os.makedirs(os.path.dirname(reward_path), exist_ok=True)
try:
with open(reward_path, mode='a', newline='') as csv_file:
csv_writer = csv.writer(csv_file)
if os.path.getsize(reward_path) == 0 and header:
csv_writer.writerow(header) # Write header if file is empty
csv_writer.writerow(max_row) # Write the best row
except Exception as e:
print(f"Error writing to reward file: {e}")
return

def save(self, save_path, cloudpickle=False):
data = {
Expand Down

0 comments on commit 191f843

Please sign in to comment.