diff --git a/Malmo/samples/Python_examples/CMakeLists.txt b/Malmo/samples/Python_examples/CMakeLists.txt index 3e22d9d5f..1ad4c4bef 100755 --- a/Malmo/samples/Python_examples/CMakeLists.txt +++ b/Malmo/samples/Python_examples/CMakeLists.txt @@ -34,6 +34,7 @@ set( SOURCES sample_missions_loader.py tabular_q_learning.py to_string_test.py + quit_from_reaching_position_test.py tutorial_1.py tutorial_2.py tutorial_3.py diff --git a/Malmo/samples/Python_examples/quit_from_reaching_position_test.py b/Malmo/samples/Python_examples/quit_from_reaching_position_test.py new file mode 100755 index 000000000..e587dde7d --- /dev/null +++ b/Malmo/samples/Python_examples/quit_from_reaching_position_test.py @@ -0,0 +1,207 @@ +# ------------------------------------------------------------------------------------------------ +# Copyright (c) 2016 Microsoft Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +# associated documentation files (the "Software"), to deal in the Software without restriction, +# including without limitation the rights to use, copy, modify, merge, publish, distribute, +# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or +# substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# ------------------------------------------------------------------------------------------------ + +# Tests the AgentQuitFromReachingPosition handler under discrete movement. +# Agent just has to move forward a set number of times. +# A positive reward will be fired for touching the quit square. +# A large negative reward will be fired for touching the *following* square. +# In an ideal world, the agent will send exactly the right number of commands, +# only the first of these rewards will fire, and the mission will end. + +# The actual behaviour depends upon the speed at which commands are sent. +# See https://github.com/Microsoft/malmo/issues/104 for some details. + +import MalmoPython +import os +import random +import sys +import time +import json +import errno + +def GetMissionXML(num): + return ''' + + + Let's run! #''' + str(num) + ''' + + + + + false + + + + + + + + + + + + + + + Britney + + + + + + + + + + + + + + + + + + + ''' + +sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) # flush print output immediately + +validate = True + +agent_host = MalmoPython.AgentHost() + +agent_host.addOptionalIntArgument( "length,l", "Number of steps required to reach goal square.", 10) +# Eg set the length to 0 or 1 to test https://github.com/Microsoft/malmo/issues/23 + +agent_host.addOptionalFlag( "stop,s", "Stop after required number of steps.") +# Eg if length is set to 10, will send 10 move commands and then wait for the mission to end. +# This can be used to test that commands are all being acted on, regardless of the speed they are sent at, +# and can give some indication of the latency between sending the final command and receiving the mission ended message. + +agent_host.addOptionalFloatArgument( "wait,w", "Number of seconds to wait between sending commands.", 0.1) +# Setting this to something slow (eg 0.1) should show a clear cycle of commands/observations/rewards, +# and a quit triggered after the correct number of commands. +# Setting this to something faster (eg 0.05) should still show a clear cylce of commands/observations/rewards, +# but there may be extra commands sent unnecessarily at the end (which shouldn't be acted on). +# Setting this to something extreme (eg 0.01) should show behaviour whereby the commands get clustered together, +# and the agent may overshoot the goal square entirely - potentially without triggering the quit. + +try: + agent_host.parse( sys.argv ) +except RuntimeError as e: + print 'ERROR:',e + print agent_host.getUsage() + exit(1) +if agent_host.receivedArgument("help"): + print agent_host.getUsage() + exit(0) + +PATH_LENGTH = agent_host.getIntArgument("length") +STOP = agent_host.receivedArgument("stop") +WAIT_TIME = agent_host.getFloatArgument("wait") +MISSION_LENGTH = 30 +NUM_REPEATS = 10 + +if agent_host.receivedArgument("test"): + print "Using test setings (overrides other command-line arguments)." + NUM_REPEATS = 1 + WAIT_TIME = 0.2 + STOP = False + PATH_LENGTH = 20 + +agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.KEEP_ALL_OBSERVATIONS) +agent_host.setRewardsPolicy(MalmoPython.RewardsPolicy.KEEP_ALL_REWARDS) + +recordingsDirectory="QuitFromReachingPosition_Recordings" + +try: + os.makedirs(recordingsDirectory) +except OSError as exception: + if exception.errno != errno.EEXIST: # ignore error if already existed + raise + +for iRepeat in xrange(NUM_REPEATS): + my_mission = MalmoPython.MissionSpec(GetMissionXML(iRepeat), validate) + # Set up a recording - MUST be done once for each mission - don't do this outside the loop! + my_mission_record = MalmoPython.MissionRecordSpec(recordingsDirectory + "//QuitFromReachingPosition_Test" + str(iRepeat) + ".tgz"); + my_mission_record.recordRewards() + my_mission_record.recordObservations() + try: + agent_host.startMission( my_mission, my_mission_record ) + except RuntimeError as e: + print "Error starting mission:",e + exit(1) + + world_state = agent_host.getWorldState() + while not world_state.is_mission_running: + time.sleep(0.01) + sys.stdout.write(".") + world_state = agent_host.getWorldState() + if len(world_state.errors): + print + for error in world_state.errors: + print "Error:",error.text + exit() + print + + # main loop: + distance = 0 + commands_sent = 0 + total_rewards = 0 + while world_state.is_mission_running: + if commands_sent < PATH_LENGTH or not STOP: + agent_host.sendCommand("movesouth 1") + commands_sent += 1 + sys.stdout.write("C") + time.sleep(WAIT_TIME) + world_state = agent_host.getWorldState() + if world_state.number_of_observations_since_last_state > 0: + for ob in world_state.observations: + jsob = json.loads(ob.text) + distance = jsob.get(u'distanceFromStart', 0) + sys.stdout.write('O{0:.0f}'.format(distance)) + if world_state.number_of_rewards_since_last_state > 0: + for rew in world_state.rewards: + if rew.value == 0: + sys.stdout.write("r") + elif rew.value == 100: + sys.stdout.write("R") + elif rew.value == -1000: + sys.stdout.write("*") + else: + sys.stdout.write("?") + total_rewards += rew.value + if world_state.is_mission_running: + sys.stdout.write("T") + else: + sys.stdout.write("F") + sys.stdout.write(" ") + print + print "Mission Ended - sent " + str(commands_sent) + " commands; final reward: " + str(total_rewards) + if total_rewards != 100: + print "ERROR - FAILED TO GET CORRECT REWARD!" + if total_rewards < 0: + print "We overran! Quit producer did not produce a quit quickly enough!" + print + + if agent_host.receivedArgument("test"): + if commands_sent != PATH_LENGTH or total_rewards != 100: + print "Number of commands sent, or total rewards received, did not match expectations." + exit(1) +time.sleep(1) \ No newline at end of file