Skip to content

Commit

Permalink
Added test for AgentQuitFromReachingPosition handler.
Browse files Browse the repository at this point in the history
  • Loading branch information
DaveyBiggers committed Jun 30, 2016
1 parent c0766f3 commit fa5b016
Show file tree
Hide file tree
Showing 2 changed files with 208 additions and 0 deletions.
1 change: 1 addition & 0 deletions Malmo/samples/Python_examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ set( SOURCES
sample_missions_loader.py
tabular_q_learning.py
to_string_test.py
quit_from_reaching_position_test.py
tutorial_1.py
tutorial_2.py
tutorial_3.py
Expand Down
207 changes: 207 additions & 0 deletions Malmo/samples/Python_examples/quit_from_reaching_position_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
# ------------------------------------------------------------------------------------------------
# Copyright (c) 2016 Microsoft Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ------------------------------------------------------------------------------------------------

# Tests the AgentQuitFromReachingPosition handler under discrete movement.
# Agent just has to move forward a set number of times.
# A positive reward will be fired for touching the quit square.
# A large negative reward will be fired for touching the *following* square.
# In an ideal world, the agent will send exactly the right number of commands,
# only the first of these rewards will fire, and the mission will end.

# The actual behaviour depends upon the speed at which commands are sent.
# See https://github.com/Microsoft/malmo/issues/104 for some details.

import MalmoPython
import os
import random
import sys
import time
import json
import errno

def GetMissionXML(num):
return '''<?xml version="1.0" encoding="UTF-8" ?>
<Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://ProjectMalmo.microsoft.com Mission.xsd">
<About>
<Summary>Let's run! #''' + str(num) + '''</Summary>
</About>
<ServerSection>
<ServerInitialConditions>
<AllowSpawning>false</AllowSpawning>
</ServerInitialConditions>
<ServerHandlers>
<FlatWorldGenerator generatorString="3;7,220*1,5*168:1,41;3;biome_1" />
<DrawingDecorator>
<DrawCuboid x1="0" y1="226" z1="0" x2="0" y2="226" z2="1000" type="stone" variant="smooth_granite"/>
<DrawBlock x="0" y="226" z="''' + str(PATH_LENGTH) + '''" type="emerald_block"/>
<DrawBlock x="0" y="226" z="''' + str(PATH_LENGTH+1) + '''" type="redstone_block"/>
</DrawingDecorator>
<ServerQuitFromTimeUp timeLimitMs="''' + str(MISSION_LENGTH * 1000) + '''"/>
<ServerQuitWhenAnyAgentFinishes />
</ServerHandlers>
</ServerSection>
<AgentSection mode="Survival">
<Name>Britney</Name>
<AgentStart>
<Placement x="0.5" y="227" z="0.5"/>
</AgentStart>
<AgentHandlers>
<DiscreteMovementCommands/>
<ObservationFromDistance>
<Marker name="Start" x="0.5" y="227" z="0.5"/>
</ObservationFromDistance>
<RewardForReachingPosition>
<Marker oneshot="true" reward="100" tolerance="0.1" x="0.5" y="227" z="''' + str(PATH_LENGTH + 0.5) + '''"/>
<Marker oneshot="true" reward="-1000" tolerance="0.1" x="0.5" y="227" z="''' + str(PATH_LENGTH+1.5) + '''"/>
</RewardForReachingPosition>
<AgentQuitFromReachingPosition>
<Marker tolerance="0.1" x="0.5" y="227" z="''' + str(PATH_LENGTH+0.5) + '''"/>
</AgentQuitFromReachingPosition>
</AgentHandlers>
</AgentSection>
</Mission>'''

sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) # flush print output immediately

validate = True

agent_host = MalmoPython.AgentHost()

agent_host.addOptionalIntArgument( "length,l", "Number of steps required to reach goal square.", 10)
# Eg set the length to 0 or 1 to test https://github.com/Microsoft/malmo/issues/23

agent_host.addOptionalFlag( "stop,s", "Stop after required number of steps.")
# Eg if length is set to 10, will send 10 move commands and then wait for the mission to end.
# This can be used to test that commands are all being acted on, regardless of the speed they are sent at,
# and can give some indication of the latency between sending the final command and receiving the mission ended message.

agent_host.addOptionalFloatArgument( "wait,w", "Number of seconds to wait between sending commands.", 0.1)
# Setting this to something slow (eg 0.1) should show a clear cycle of commands/observations/rewards,
# and a quit triggered after the correct number of commands.
# Setting this to something faster (eg 0.05) should still show a clear cylce of commands/observations/rewards,
# but there may be extra commands sent unnecessarily at the end (which shouldn't be acted on).
# Setting this to something extreme (eg 0.01) should show behaviour whereby the commands get clustered together,
# and the agent may overshoot the goal square entirely - potentially without triggering the quit.

try:
agent_host.parse( sys.argv )
except RuntimeError as e:
print 'ERROR:',e
print agent_host.getUsage()
exit(1)
if agent_host.receivedArgument("help"):
print agent_host.getUsage()
exit(0)

PATH_LENGTH = agent_host.getIntArgument("length")
STOP = agent_host.receivedArgument("stop")
WAIT_TIME = agent_host.getFloatArgument("wait")
MISSION_LENGTH = 30
NUM_REPEATS = 10

if agent_host.receivedArgument("test"):
print "Using test setings (overrides other command-line arguments)."
NUM_REPEATS = 1
WAIT_TIME = 0.2
STOP = False
PATH_LENGTH = 20

agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.KEEP_ALL_OBSERVATIONS)
agent_host.setRewardsPolicy(MalmoPython.RewardsPolicy.KEEP_ALL_REWARDS)

recordingsDirectory="QuitFromReachingPosition_Recordings"

try:
os.makedirs(recordingsDirectory)
except OSError as exception:
if exception.errno != errno.EEXIST: # ignore error if already existed
raise

for iRepeat in xrange(NUM_REPEATS):
my_mission = MalmoPython.MissionSpec(GetMissionXML(iRepeat), validate)
# Set up a recording - MUST be done once for each mission - don't do this outside the loop!
my_mission_record = MalmoPython.MissionRecordSpec(recordingsDirectory + "//QuitFromReachingPosition_Test" + str(iRepeat) + ".tgz");
my_mission_record.recordRewards()
my_mission_record.recordObservations()
try:
agent_host.startMission( my_mission, my_mission_record )
except RuntimeError as e:
print "Error starting mission:",e
exit(1)

world_state = agent_host.getWorldState()
while not world_state.is_mission_running:
time.sleep(0.01)
sys.stdout.write(".")
world_state = agent_host.getWorldState()
if len(world_state.errors):
print
for error in world_state.errors:
print "Error:",error.text
exit()
print

# main loop:
distance = 0
commands_sent = 0
total_rewards = 0
while world_state.is_mission_running:
if commands_sent < PATH_LENGTH or not STOP:
agent_host.sendCommand("movesouth 1")
commands_sent += 1
sys.stdout.write("C")
time.sleep(WAIT_TIME)
world_state = agent_host.getWorldState()
if world_state.number_of_observations_since_last_state > 0:
for ob in world_state.observations:
jsob = json.loads(ob.text)
distance = jsob.get(u'distanceFromStart', 0)
sys.stdout.write('O{0:.0f}'.format(distance))
if world_state.number_of_rewards_since_last_state > 0:
for rew in world_state.rewards:
if rew.value == 0:
sys.stdout.write("r")
elif rew.value == 100:
sys.stdout.write("R")
elif rew.value == -1000:
sys.stdout.write("*")
else:
sys.stdout.write("?")
total_rewards += rew.value
if world_state.is_mission_running:
sys.stdout.write("T")
else:
sys.stdout.write("F")
sys.stdout.write(" ")
print
print "Mission Ended - sent " + str(commands_sent) + " commands; final reward: " + str(total_rewards)
if total_rewards != 100:
print "ERROR - FAILED TO GET CORRECT REWARD!"
if total_rewards < 0:
print "We overran! Quit producer did not produce a quit quickly enough!"
print

if agent_host.receivedArgument("test"):
if commands_sent != PATH_LENGTH or total_rewards != 100:
print "Number of commands sent, or total rewards received, did not match expectations."
exit(1)
time.sleep(1)

0 comments on commit fa5b016

Please sign in to comment.