Skip to content

Commit

Permalink
Training test
Browse files Browse the repository at this point in the history
  • Loading branch information
jkulhanek committed Sep 26, 2018
1 parent aea126a commit 1f86f57
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 72 deletions.
2 changes: 2 additions & 0 deletions agent/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ def forward(self, inp):
(x, y,) = inp

x = x.view(-1)
print(self.fc_siemense.weight)
x = self.fc_siemense(x)
print('testok')
x = F.relu(x, True)

y = y.view(-1)
Expand Down
65 changes: 18 additions & 47 deletions agent/training_thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ def __init__(self,
scene : str,
**kwargs):

super(TrainingThread, self).__init__(name = "process_{id}")
super(TrainingThread, self).__init__()

# Initialize the environment
self.env = THORDiscreteEnvironment(scene, **kwargs)
self.device = device
self.local_backbone_network = SharedNetwork()
self.master = master
self.id = id

self.gamma : float= kwargs.get('gamma', 0.99)
self.grad_norm: float = kwargs.get('grad_norm', 40.0)
Expand All @@ -49,11 +50,6 @@ def __init__(self,
self.criterion = ActorCriticLoss(entropy_beta)
self.policy_network = nn.Sequential(self.local_backbone_network, self.scene_network)

self.master.optimizer = self.master.createOptimizer(self.policy_network.parameters())

import torch.optim as optim
optimizer = optim.RMSprop(self.policy_network.parameters(), eps=0.1, alpha=0.99, lr=0.0007001643593729748)

# Initialize the episode
self._reset_episode()
self._sync_network()
Expand All @@ -70,9 +66,6 @@ def _ensure_shared_grads(self):
def get_action_space_size(self):
return len(self.env.actions)

def start(self):
self.env.start()

def _reset_episode(self):
self.episode_reward = 0
self.episode_length = 0
Expand All @@ -98,6 +91,7 @@ def _forward_explore(self):
goal_processed = torch.from_numpy(state["goal"])

(policy, value) = self.policy_network((x_processed, goal_processed,))
print('oka')

# Store raw network output to use in backprop
results["policy"].append(policy)
Expand All @@ -112,7 +106,6 @@ def _forward_explore(self):
value = value.data.numpy()



# Makes the step in the environment
self.env.step(action)

Expand All @@ -139,7 +132,7 @@ def _forward_explore(self):
rollout_path["state"].append(state)
rollout_path["action"].append(action)
rollout_path["rewards"].append(reward)
rollout_path["done"].append(is_terminal)
rollout_path["done"].append(is_terminal)

if is_terminal:
# TODO: add logging
Expand Down Expand Up @@ -194,44 +187,22 @@ def _optimize_path(self, playout_reward: float, results, rollout_path):
loss = loss.sum()

loss_value = loss.detach().numpy()
print(loss_value)
self.master.optimizer.optimize(loss,
self.policy_network.parameters(),
self.master_network.parameters())

def run(self):
self.env.reset()
self._sync_network()
while True:
self._sync_network()
# Plays some samples
playout_reward, results, rollout_path = self._forward_explore()
# Train on collected samples
self._optimize_path(playout_reward, results, rollout_path)
pass

if __name__ == '__main__':
from agent.network import SharedNetwork, SceneSpecificNetwork
import sys
import pickle

model_data = pickle.load(open('D:\\models\\visual-navigation\\weights.p', 'rb'))


logger = logging.getLogger('training')
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler(sys.stdout))

thread = TrainingThread(
id = 1,
device = torch.device('cpu'),
shared_network = SharedNetwork(),
scene = 'bedroom_04',
entropy_beta = 0.2,
logger = logger,
max_t = 5,
terminal_state_id = 26,
h5_file_path = 'D:\\datasets\\visual_navigation_precomputed\\bathroom_02.h5'
)

print('Loaded')
thread.run()
print(f'Thread {self.id} started')
try:
self.env.reset()
while True:
self._sync_network()
# Plays some samples
playout_reward, results, rollout_path = self._forward_explore()
print(self.episode_length)
# Train on collected samples
self._optimize_path(playout_reward, results, rollout_path)
pass
except Exception as e:
self.logger.error(e.msg)
48 changes: 23 additions & 25 deletions agent/training.py → training.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,23 +70,6 @@ def __init__(self, device, config):
for net in self.scene_networks.values():
net.share_memory()

branches = [(scene, int(target)) for scene in TASK_LIST.keys() for target in TASK_LIST.get(scene)]

def _createThread(id, task):
(scene, target) = task
return TrainingThread(
id = id,
device = self.device,
master = self,
network = nn.Sequential(self.shared_network, self.scene_networks[scene]),
scene = scene,
logger = self.logger,
terminal_state_id = target,
**self.config)

self.createOptimizer = lambda params: torch.optim.RMSprop(params, lr = self.learning_rate, alpha = self.rmsp_alpha, eps = self.rmsp_epsilon)
self.threads = [_createThread(i, task) for i, task in enumerate(branches)]

def run(self):
print("Training started")
self.print_parameters()
Expand All @@ -102,15 +85,30 @@ def run(self):
optimizer_wrapper = TrainingOptimizer(self.grad_norm, optimizer, parameters)
self.optimizer = optimizer_wrapper

self.threads[0].run()
# threads[0].join()
# Prepare threads
branches = [(scene, int(target)) for scene in TASK_LIST.keys() for target in TASK_LIST.get(scene)]
def _createThread(id, task):
(scene, target) = task
net = nn.Sequential(self.shared_network, self.scene_networks[scene])
net.share_memory()
return TrainingThread(
id = id,
device = self.device,
master = self,
network = net,
scene = scene,
logger = self.logger,
terminal_state_id = target,
**self.config)

return
self.threads = [_createThread(i, task) for i, task in enumerate(branches)]
self.threads[0].start()
self.threads[0].join()

for thread in threads:
for thread in self.threads:
thread.start()

for thread in threads:
for thread in self.threads:
thread.join()


Expand All @@ -124,22 +122,22 @@ def print_parameters(self):
self.logger.info(f"- batch size: {self.config.get('batch_size')}")
self.logger.info(f"- gamma: {self.config.get('gamma')}")
self.logger.info(f"- learning rate: {self.config.get('learning_rate')}")


if __name__ == "__main__":
mp.set_start_method('spawn')
training = Training(torch.device('cpu'), {
'learning_rate': 7 * 10e4,
'rmsp_alpha': 0.99,
'rmsp_epsilon': 0.1,
'h5_file_path': (lambda scene: f"D:\\datasets\\visual_navigation_precomputed\\{scene}.h5")
'h5_file_path': (lambda scene: f"/mnt/d/datasets/visual_navigation_precomputed/{scene}.h5")
})

import pickle
shared_net = SharedNetwork()
scene_nets = { key:SceneSpecificNetwork(4) for key in TASK_LIST.keys() }

# Load weights trained on tensorflow
data = pickle.load(open(os.path.join(__file__, '..\\..\\weights.p'), 'rb'), encoding='latin1')
data = pickle.load(open(os.path.normpath(os.path.join(__file__, '../weights.p')), 'rb'), encoding='latin1')
def convertToStateDict(data):
return {key:torch.Tensor(v) for (key, v) in data.items()}

Expand Down

0 comments on commit 1f86f57

Please sign in to comment.