Added test for AgentQuitFromReachingPosition handler.

yaro977 · Jun 30, 2016 · fa5b016 · fa5b016
1 parent c0766f3
commit fa5b016
Show file tree

Hide file tree

Showing 2 changed files with 208 additions and 0 deletions.
diff --git a/Malmo/samples/Python_examples/CMakeLists.txt b/Malmo/samples/Python_examples/CMakeLists.txt
@@ -34,6 +34,7 @@ set( SOURCES
   sample_missions_loader.py
   tabular_q_learning.py
   to_string_test.py
+  quit_from_reaching_position_test.py
   tutorial_1.py
   tutorial_2.py
   tutorial_3.py

diff --git a/Malmo/samples/Python_examples/quit_from_reaching_position_test.py b/Malmo/samples/Python_examples/quit_from_reaching_position_test.py
@@ -0,0 +1,207 @@
+# ------------------------------------------------------------------------------------------------
+# Copyright (c) 2016 Microsoft Corporation
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
+# associated documentation files (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge, publish, distribute,
+# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# 
+# The above copyright notice and this permission notice shall be included in all copies or
+# substantial portions of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+# ------------------------------------------------------------------------------------------------
+
+# Tests the AgentQuitFromReachingPosition handler under discrete movement.
+# Agent just has to move forward a set number of times.
+# A positive reward will be fired for touching the quit square.
+# A large negative reward will be fired for touching the *following* square.
+# In an ideal world, the agent will send exactly the right number of commands,
+# only the first of these rewards will fire, and the mission will end.
+
+# The actual behaviour depends upon the speed at which commands are sent.
+# See https://github.com/Microsoft/malmo/issues/104 for some details.
+
+import MalmoPython
+import os
+import random
+import sys
+import time
+import json
+import errno
+
+def GetMissionXML(num):
+    return '''<?xml version="1.0" encoding="UTF-8" ?>
+    <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://ProjectMalmo.microsoft.com Mission.xsd">
+        <About>
+            <Summary>Let's run! #''' + str(num) + '''</Summary>
+        </About>
+
+        <ServerSection>
+            <ServerInitialConditions>
+                <AllowSpawning>false</AllowSpawning>
+            </ServerInitialConditions>
+            <ServerHandlers>
+                <FlatWorldGenerator generatorString="3;7,220*1,5*168:1,41;3;biome_1" />
+                <DrawingDecorator>
+                    <DrawCuboid x1="0" y1="226" z1="0" x2="0" y2="226" z2="1000" type="stone" variant="smooth_granite"/>
+                    <DrawBlock x="0" y="226" z="''' + str(PATH_LENGTH) + '''" type="emerald_block"/>
+                    <DrawBlock x="0" y="226" z="''' + str(PATH_LENGTH+1) + '''" type="redstone_block"/>
+                </DrawingDecorator>
+                <ServerQuitFromTimeUp timeLimitMs="''' + str(MISSION_LENGTH * 1000) + '''"/>
+                <ServerQuitWhenAnyAgentFinishes />
+            </ServerHandlers>
+        </ServerSection>
+
+        <AgentSection mode="Survival">
+            <Name>Britney</Name>
+            <AgentStart>
+                <Placement x="0.5" y="227" z="0.5"/>
+            </AgentStart>
+            <AgentHandlers>
+                <DiscreteMovementCommands/>
+                <ObservationFromDistance>
+                    <Marker name="Start" x="0.5" y="227" z="0.5"/>
+                </ObservationFromDistance>
+                <RewardForReachingPosition>
+                    <Marker oneshot="true" reward="100" tolerance="0.1" x="0.5" y="227" z="''' + str(PATH_LENGTH + 0.5) + '''"/>
+                    <Marker oneshot="true" reward="-1000" tolerance="0.1" x="0.5" y="227" z="''' + str(PATH_LENGTH+1.5) + '''"/>
+                </RewardForReachingPosition>      
+                <AgentQuitFromReachingPosition>
+                    <Marker tolerance="0.1" x="0.5" y="227" z="''' + str(PATH_LENGTH+0.5) + '''"/>
+                </AgentQuitFromReachingPosition>
+            </AgentHandlers>
+        </AgentSection>
+
+    </Mission>'''
+
+sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)  # flush print output immediately
+
+validate = True
+
+agent_host = MalmoPython.AgentHost()
+
+agent_host.addOptionalIntArgument( "length,l", "Number of steps required to reach goal square.", 10)
+# Eg set the length to 0 or 1 to test https://github.com/Microsoft/malmo/issues/23
+
+agent_host.addOptionalFlag( "stop,s", "Stop after required number of steps.")
+# Eg if length is set to 10, will send 10 move commands and then wait for the mission to end.
+# This can be used to test that commands are all being acted on, regardless of the speed they are sent at,
+# and can give some indication of the latency between sending the final command and receiving the mission ended message.
+
+agent_host.addOptionalFloatArgument( "wait,w", "Number of seconds to wait between sending commands.", 0.1)
+# Setting this to something slow (eg 0.1) should show a clear cycle of commands/observations/rewards,
+# and a quit triggered after the correct number of commands.
+# Setting this to something faster (eg 0.05) should still show a clear cylce of commands/observations/rewards,
+# but there may be extra commands sent unnecessarily at the end (which shouldn't be acted on).
+# Setting this to something extreme (eg 0.01) should show behaviour whereby the commands get clustered together,
+# and the agent may overshoot the goal square entirely - potentially without triggering the quit.
+
+try:
+    agent_host.parse( sys.argv )
+except RuntimeError as e:
+    print 'ERROR:',e
+    print agent_host.getUsage()
+    exit(1)
+if agent_host.receivedArgument("help"):
+    print agent_host.getUsage()
+    exit(0)
+
+PATH_LENGTH = agent_host.getIntArgument("length")
+STOP = agent_host.receivedArgument("stop")
+WAIT_TIME = agent_host.getFloatArgument("wait")
+MISSION_LENGTH = 30
+NUM_REPEATS = 10
+
+if agent_host.receivedArgument("test"):
+    print "Using test setings (overrides other command-line arguments)."
+    NUM_REPEATS = 1
+    WAIT_TIME = 0.2
+    STOP = False
+    PATH_LENGTH = 20
+
+agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.KEEP_ALL_OBSERVATIONS)
+agent_host.setRewardsPolicy(MalmoPython.RewardsPolicy.KEEP_ALL_REWARDS)
+
+recordingsDirectory="QuitFromReachingPosition_Recordings"
+
+try:
+    os.makedirs(recordingsDirectory)
+except OSError as exception:
+    if exception.errno != errno.EEXIST: # ignore error if already existed
+        raise
+
+for iRepeat in xrange(NUM_REPEATS):
+    my_mission = MalmoPython.MissionSpec(GetMissionXML(iRepeat), validate)
+    # Set up a recording - MUST be done once for each mission - don't do this outside the loop!
+    my_mission_record = MalmoPython.MissionRecordSpec(recordingsDirectory + "//QuitFromReachingPosition_Test" + str(iRepeat) + ".tgz");
+    my_mission_record.recordRewards()
+    my_mission_record.recordObservations()
+    try:
+        agent_host.startMission( my_mission, my_mission_record )
+    except RuntimeError as e:
+        print "Error starting mission:",e
+        exit(1)
+
+    world_state = agent_host.getWorldState()
+    while not world_state.is_mission_running:
+        time.sleep(0.01)
+        sys.stdout.write(".")
+        world_state = agent_host.getWorldState()
+        if len(world_state.errors):
+            print
+            for error in world_state.errors:
+                print "Error:",error.text
+                exit()
+    print
+
+    # main loop:
+    distance = 0
+    commands_sent = 0
+    total_rewards = 0
+    while world_state.is_mission_running:
+        if commands_sent < PATH_LENGTH or not STOP:
+            agent_host.sendCommand("movesouth 1")
+            commands_sent += 1
+            sys.stdout.write("C")
+        time.sleep(WAIT_TIME)
+        world_state = agent_host.getWorldState()
+        if world_state.number_of_observations_since_last_state > 0:
+            for ob in world_state.observations:
+                jsob = json.loads(ob.text)
+                distance = jsob.get(u'distanceFromStart', 0)
+                sys.stdout.write('O{0:.0f}'.format(distance))
+        if world_state.number_of_rewards_since_last_state > 0:
+            for rew in world_state.rewards:
+                if rew.value == 0:
+                    sys.stdout.write("r")
+                elif rew.value == 100:
+                    sys.stdout.write("R")
+                elif rew.value == -1000:
+                    sys.stdout.write("*")
+                else:
+                    sys.stdout.write("?")
+                total_rewards += rew.value
+        if world_state.is_mission_running:
+            sys.stdout.write("T")
+        else:
+            sys.stdout.write("F")
+        sys.stdout.write(" ")
+    print
+    print "Mission Ended - sent " + str(commands_sent) + " commands; final reward: " + str(total_rewards)
+    if total_rewards != 100:
+        print "ERROR - FAILED TO GET CORRECT REWARD!"
+        if total_rewards < 0:
+            print "We overran! Quit producer did not produce a quit quickly enough!"
+    print
+
+    if agent_host.receivedArgument("test"):
+        if commands_sent != PATH_LENGTH or total_rewards != 100:
+            print "Number of commands sent, or total rewards received, did not match expectations."
+            exit(1)
+time.sleep(1)