|
| 1 | +#!/usr/bin/env python |
| 2 | +import os |
| 3 | + |
| 4 | +import numpy as np |
| 5 | +import sys, gym, time |
| 6 | + |
| 7 | +import ray.utils |
| 8 | + |
| 9 | +from ray.rllib.models.preprocessors import get_preprocessor |
| 10 | +from ray.rllib.evaluation.sample_batch_builder import SampleBatchBuilder |
| 11 | +from ray.rllib.offline.json_writer import JsonWriter |
| 12 | + |
| 13 | +from custom_mcar import MountainCar |
| 14 | + |
| 15 | +DEMO_DATA_DIR = "mcar-out" |
| 16 | + |
| 17 | + |
| 18 | +def key_press(key, mod): |
| 19 | + global human_agent_action, human_wants_restart, human_sets_pause |
| 20 | + if key == 0xFF0D: |
| 21 | + human_wants_restart = True |
| 22 | + if key == 32: |
| 23 | + human_sets_pause = not human_sets_pause |
| 24 | + a = int(key - ord("0")) |
| 25 | + if a <= 0 or a >= ACTIONS: |
| 26 | + return |
| 27 | + human_agent_action = a |
| 28 | + |
| 29 | + |
| 30 | +def key_release(key, mod): |
| 31 | + global human_agent_action |
| 32 | + a = int(key - ord("0")) |
| 33 | + if a <= 0 or a >= ACTIONS: |
| 34 | + return |
| 35 | + if human_agent_action == a: |
| 36 | + human_agent_action = 0 |
| 37 | + |
| 38 | + |
| 39 | +def rollout(env, eps_id): |
| 40 | + global human_agent_action, human_wants_restart, human_sets_pause |
| 41 | + human_wants_restart = False |
| 42 | + obs = env.reset() |
| 43 | + prev_action = np.zeros_like(env.action_space.sample()) |
| 44 | + prev_reward = 0 |
| 45 | + t = 0 |
| 46 | + skip = 0 |
| 47 | + total_reward = 0 |
| 48 | + total_timesteps = 0 |
| 49 | + while 1: |
| 50 | + if not skip: |
| 51 | + print("taking action {}".format(human_agent_action)) |
| 52 | + a = human_agent_action |
| 53 | + total_timesteps += 1 |
| 54 | + skip = SKIP_CONTROL |
| 55 | + else: |
| 56 | + skip -= 1 |
| 57 | + |
| 58 | + new_obs, r, done, info = env.step(a) |
| 59 | + # Build the batch |
| 60 | + batch_builder.add_values( |
| 61 | + t=t, |
| 62 | + eps_id=eps_id, |
| 63 | + agent_index=0, |
| 64 | + obs=prep.transform(obs), |
| 65 | + actions=a, |
| 66 | + action_prob=1.0, # put the true action probability here |
| 67 | + action_logp=0, |
| 68 | + action_dist_inputs=None, |
| 69 | + rewards=r, |
| 70 | + prev_actions=prev_action, |
| 71 | + prev_rewards=prev_reward, |
| 72 | + dones=done, |
| 73 | + infos=info, |
| 74 | + new_obs=prep.transform(new_obs), |
| 75 | + ) |
| 76 | + obs = new_obs |
| 77 | + prev_action = a |
| 78 | + prev_reward = r |
| 79 | + |
| 80 | + if r != 0: |
| 81 | + print("reward %0.3f" % r) |
| 82 | + total_reward += r |
| 83 | + window_still_open = env.wrapped.render() |
| 84 | + if window_still_open == False: |
| 85 | + return False |
| 86 | + if done: |
| 87 | + break |
| 88 | + if human_wants_restart: |
| 89 | + break |
| 90 | + while human_sets_pause: |
| 91 | + env.wrapped.render() |
| 92 | + time.sleep(0.1) |
| 93 | + time.sleep(0.1) |
| 94 | + print("timesteps %i reward %0.2f" % (total_timesteps, total_reward)) |
| 95 | + writer.write(batch_builder.build_and_reset()) |
| 96 | + |
| 97 | + |
| 98 | +if __name__ == "__main__": |
| 99 | + batch_builder = SampleBatchBuilder() # or MultiAgentSampleBatchBuilder |
| 100 | + writer = JsonWriter(DEMO_DATA_DIR) |
| 101 | + |
| 102 | + env = MountainCar() |
| 103 | + |
| 104 | + # RLlib uses preprocessors to implement transforms such as one-hot encoding |
| 105 | + # and flattening of tuple and dict observations. For CartPole a no-op |
| 106 | + # preprocessor is used, but this may be relevant for more complex envs. |
| 107 | + prep = get_preprocessor(env.observation_space)(env.observation_space) |
| 108 | + print("The preprocessor is", prep) |
| 109 | + |
| 110 | + if not hasattr(env.action_space, "n"): |
| 111 | + raise Exception("Keyboard agent only supports discrete action spaces") |
| 112 | + ACTIONS = env.action_space.n |
| 113 | + SKIP_CONTROL = 0 # Use previous control decision SKIP_CONTROL times, that's how you |
| 114 | + # can test what skip is still usable. |
| 115 | + |
| 116 | + human_agent_action = 0 |
| 117 | + human_wants_restart = False |
| 118 | + human_sets_pause = False |
| 119 | + |
| 120 | + env.reset() |
| 121 | + env.wrapped.render() |
| 122 | + env.wrapped.unwrapped.viewer.window.on_key_press = key_press |
| 123 | + env.wrapped.unwrapped.viewer.window.on_key_release = key_release |
| 124 | + |
| 125 | + print("ACTIONS={}".format(ACTIONS)) |
| 126 | + print("Press keys 1 2 3 ... to take actions 1 2 3 ...") |
| 127 | + print("No keys pressed is taking action 0") |
| 128 | + |
| 129 | + for i in range(20): |
| 130 | + window_still_open = rollout(env, i) |
| 131 | + if window_still_open == False: |
| 132 | + break |
| 133 | + |
0 commit comments