run_wml_tests

#!/usr/bin/env python3
# encoding: utf-8
"""
This script runs a sequence of wml unit test scenarios.
"""

import argparse, enum, os, re, subprocess, sys

class Verbosity(enum.IntEnum):
    """What to display depending on how many -v arguments were given on the command line."""
    # This doesn't use enum.unique, more than one of these could have the same int level
    NAMES_OF_TESTS_RUN = 1 # Show progress when running interactively, with a new line each time a new batch starts
    SCRIPT_DEBUGGING = 2 # The command lines needed to run Wesnoth directly for a given test is printed
    OUTPUT_OF_PASSING_TESTS = 3 # Print the output even when tests give the expected result

class UnexpectedTestStatusException(Exception):
    """Exception raised when a unit test doesn't return the expected result."""
    pass

class UnitTestResult(enum.Enum):
    """Enum corresponding to game_launcher.hpp's unit_test_result"""
    PASS = 0
    FAIL = 1
    TIMEOUT = 2
    FAIL_LOADING_REPLAY = 3
    FAIL_PLAYING_REPLAY = 4
    FAIL_BROKE_STRICT = 5
    FAIL_WML_EXCEPTION = 6
    FAIL_BY_DEFEAT = 7
    PASS_BY_VICTORY = 8
    
    def __str__(self):
        return str(self.value) + ' ' + self.name

class TestCase:
    """Represents a single line of the wml_test_schedule."""
    def __init__(self, status, name):
        self.status = status
        self.name = name

    def __str__(self):
        return "TestCase<{status}, {name}>".format(status=self.status, name=self.name)

class TestResultAccumulator:
    passed = []
    skipped = []
    failed = []
    crashed = []
    
    def __init__(self, total):
        self.total = total
    
    def pass_test(self, batch):
        """These tests had the expected result - passed if UnitTestResult.PASS was expected, etc."""
        self.passed += batch
    
    def skip_test(self, batch):
        """These tests were not run at all. For example, if they expect UnitTestResult.TIMEOUT but the run requested no timeouts."""
        self.skipped += batch
    
    def fail_test(self, batch):
        """These tests did not crash, but did not have the expected result - they passed if UnitTestResult.FAIL was expected, etc."""
        self.failed += batch
    
    def crash_test(self, batch):
        """Segfaults and similar unusual exits from the program under test."""
        self.crashed += batch
    
    def __bool__(self):
        """Returns true if everything that was expected to run (based on the command line options) ran and passed."""
        # The logic here is redundant, from the length of passed and skipped we should know that failed and crashed are both empty;
        # however a false everything-passed result is much worse than a couple of extra checks here.
        return len(self.passed) + len(self.skipped) == self.total and len(self.failed) == 0 and len(self.crashed) == 0

    def __str__(self):
        result = "Passed {count} of {total} tests\n".format(count=len(self.passed), total=self.total)
        if self.skipped:
            result += "Skipped batches containing {count} tests: ({names})\n".format(count=len(self.skipped),
                    names=", ".join([test.name for test in self.skipped]))
        if self.failed:
            result += "Failed batches containing {count} tests: ({names})\n".format(count=len(self.failed),
                    names=", ".join([test.name for test in self.failed]))
        if self.crashed:
            result += "Crashed during batches containing {count} tests: ({names})".format(count=len(self.crashed),
                    names=", ".join([test.name for test in self.crashed]))
        return result;

class TestListParser:
    """Each line in the list of tests should be formatted:
        <expected return code><space><name of unit test scenario>

    For example:
        0 test_functionality

    Lines beginning # are treated as comments.
    """
    def __init__(self, options):
        self.verbose = options.verbose
        self.filename = options.list

    def get(self, batcher):
        status_name_re = re.compile(r"^(\d+) ([\w-]+)$")
        test_list = []
        for line in open(self.filename, mode="rt"):
            line = line.strip()
            if line == "" or line.startswith("#"):
                continue

            x = status_name_re.match(line)
            if x is None:
                print("Could not parse test list file: ", line)

            t = TestCase(UnitTestResult(int(x.groups()[0])), x.groups()[1])
            if self.verbose >= Verbosity.SCRIPT_DEBUGGING:
                print(t)
            test_list.append(t)
        return batcher(test_list), TestResultAccumulator(len(test_list))

def run_with_rerun_for_sdl_video(args, timeout):
    """A wrapper for subprocess.run with a workaround for the issue of travis+18.04
    intermittently failing to initialise SDL.
    """
    # Sanity check on the number of retries. It's a rare failure, a single retry would probably
    # be enough.
    sdl_retries = 0
    while sdl_retries < 10:
        res = subprocess.run(args, timeout=timeout, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        retry = False
        if b"Could not initialize SDL_video" in res.stdout:
            retry = True
        if not retry:
            return res
        sdl_retries += 1
        print("Could not initialise SDL_video error, attempt", sdl_retries)

class WesnothRunner:
    def __init__(self, options):
        self.verbose = options.verbose
        if options.path in ["XCode", "xcode", "Xcode"]:
            import glob
            path_list = []
            build = "Debug" if options.debug_bin else "Release"
            pattern = os.path.join("~/Library/Developer/XCode/DerivedData/The_Battle_for_Wesnoth*/Build/Products",
                build, "The Battle for Wesnoth.app/Contents/MacOS/The Battle for Wesnoth")
            path_list.extend(glob.glob(os.path.expanduser(pattern)))
            if len(path_list) == 0:
                raise FileNotFoundError("Couldn't find your xcode build dir")
            if len(path_list) > 1:
                # seems better than choosing one at random
                raise RuntimeError("Found more than one xcode build dir")
            path = path_list[0]
        else:
            if options.path is None:
                path = os.path.split(os.path.realpath(sys.argv[0]))[0]
            else:
                path = options.path
            if os.path.isdir(path):
                path += "/wesnoth"
            if options.debug_bin:
                path += "-debug"
        self.common_args = [path, "--nobanner"]
        if os.name == 'nt':
            self.common_args.append("--wnoconsole")
            self.common_args.append("--wnoredirect")
        if options.strict_mode:
            self.common_args.append("--log-strict=warning")
        if options.clean:
            self.common_args.append("--noaddons")
        if options.additional_arg is not None:
            self.common_args.extend(options.additional_arg)
        self.timeout = options.timeout
        self.batch_timeout = options.batch_timeout
        if self.verbose >= Verbosity.SCRIPT_DEBUGGING:
            print("Options that will be used for all Wesnoth instances:", repr(self.common_args))

    def run_tests(self, test_list, test_summary):
        """Run all of the tests in a single instance of Wesnoth"""
        if len(test_list) == 0:
            raise ValueError("Running an empty test list")
        if len(test_list) > 1:
            for test in test_list:
                if test.status != UnitTestResult.PASS:
                    raise NotImplementedError("run_tests doesn't yet support batching tests with non-zero statuses")
        expected_result = test_list[0].status

        if expected_result == UnitTestResult.TIMEOUT and self.timeout == 0:
            test_summary.skip_test(test_list)
            if self.verbose >= Verbosity.NAMES_OF_TESTS_RUN:
                print('Skipping test', test_list[0].name, 'because timeout is disabled')
            return
        if expected_result == UnitTestResult.FAIL_BROKE_STRICT and not options.strict_mode:
            test_summary.skip_test(test_list)
            if self.verbose >= Verbosity.NAMES_OF_TESTS_RUN:
                print('Skipping test', test_list[0].name, 'because strict mode is disabled')
            return

        args = self.common_args.copy()
        for test in test_list:
            args.append("-u")
            args.append(test.name)

        if self.timeout == 0:
            timeout = None
        elif len(test_list) == 1:
            timeout = self.timeout
        else:
            timeout = self.batch_timeout

        if self.verbose >= Verbosity.NAMES_OF_TESTS_RUN:
            if len(test_list) == 1:
                print("Running test", test_list[0].name)
            else:
                print("Running {count} tests ({names})".format(count=len(test_list),
                    names=", ".join([test.name for test in test_list])))
        if self.verbose >= Verbosity.SCRIPT_DEBUGGING:
            print(repr(args))

        try:
            res = run_with_rerun_for_sdl_video(args, timeout)
        except subprocess.TimeoutExpired as t:
            if expected_result != UnitTestResult.TIMEOUT:
                print("Timed out (killed by Python timeout implementation), while running batch ({names})".format(
                    names=", ".join([test.name for test in test_list])))
            res = subprocess.CompletedProcess(args, UnitTestResult.TIMEOUT.value, t.output or b'')
        if self.verbose >= Verbosity.OUTPUT_OF_PASSING_TESTS:
            print(res.stdout.decode('utf-8'))
            print("Result:", res.returncode)
        if res.returncode < 0:
            print("Wesnoth exited because of signal", -res.returncode)
            if options.backtrace:
                print("Launching GDB for a backtrace...")
                gdb_args = ["gdb", "-q", "-batch", "-ex", "start", "-ex", "continue", "-ex", "bt", "-ex", "quit", "--args"]
                gdb_args.extend(args)
                subprocess.run(gdb_args, timeout=240)
            test_summary.crash_test(test_list)
            raise UnexpectedTestStatusException()
        returned_result = None
        try:
            returned_result = UnitTestResult(res.returncode)
        except:
            print("Wesnoth returned an unexpected value: ", res.returncode)
            test_summary.crash_test(test_list)
            raise UnexpectedTestStatusException()
        if returned_result == expected_result:
            test_summary.pass_test(test_list)
        else:
            if self.verbose < Verbosity.OUTPUT_OF_PASSING_TESTS:
                # Batch mode only supports tests that should UnitTestResult.PASS, so the output
                # here is likely to have many 'PASS TEST' lines with the failing test last.
                # If support for batching other UnitTestResults is added then this needs to filter
                # the output from the other tests.
                print(res.stdout.decode('utf-8'))
            print("Failure, Wesnoth returned", returned_result, "but we expected", expected_result)
            test_summary.fail_test(test_list)
            raise UnexpectedTestStatusException()

def test_batcher(test_list):
    """A generator function that collects tests into batches which a single
    instance of Wesnoth can run.
    """
    expected_to_pass = []
    for test in test_list:
        if test.status == UnitTestResult.PASS:
            expected_to_pass.append(test)
        else:
            yield [test]
    if len(expected_to_pass) == 0:
        return
    if options.batch_max == 0:
        yield expected_to_pass
        return
    while len(expected_to_pass) > 0:
        yield expected_to_pass[0:options.batch_max]
        expected_to_pass = expected_to_pass[options.batch_max:]

def test_nobatcher(test_list):
    """A generator function that provides the same API as test_batcher but
    emits the tests one at a time."""
    for test in test_list:
        yield [test]

if __name__ == '__main__':
    ap = argparse.ArgumentParser()
    # The options that are mandatory to support (because they're used in the Travis script)
    # are the one-letter forms of verbose, clean, timeout and backtrace.
    ap.add_argument("-v", "--verbose", action="count", default=0,
        help="Verbose mode. Additional -v arguments increase the verbosity.")
    ap.add_argument("-c", "--clean", action="store_true",
        help="Clean mode. (Don't load any add-ons. Used for mainline tests.)")
    ap.add_argument("-a", "--additional_arg", action="append",
        help="Additional arguments to go to wesnoth. For options that start with a hyphen, '--add_argument --data-dir' will give an error, use '--add_argument=--data-dir' instead.")
    ap.add_argument("-t", "--timeout", type=int, default=10,
        help="New timer value to use, instead of 10s as default. The value 0 means no timer, and also skips tests that expect timeout.")
    ap.add_argument("-bt", "--batch-timeout", type=int, default=300,
        help="New timer value to use for batched tests, instead of 300s as default.")
    ap.add_argument("-bm", "--batch-max", type=int, default=0,
        help="Maximum number of tests to do in a batch. Default no limit.")
    ap.add_argument("-bd", "--batch-disable", action="store_const", const=1, dest='batch_max',
        help="Disable test batching, may be useful if debugging a small subset of tests. Equivalent to --batch-max=1")
    ap.add_argument("-s", "--no-strict", dest="strict_mode", action="store_false",
        help="Disable strict mode. By default, we run wesnoth with the option --log-strict=warning to ensure errors result in a failed test.")
    ap.add_argument("-d", "--debug_bin", action="store_true",
        help="Run wesnoth-debug binary instead of wesnoth.")
    ap.add_argument("-g", "--backtrace", action="store_true",
        help="If we encounter a crash, generate a backtrace using gdb. Must have gdb installed for this option.")
    ap.add_argument("-p", "--path", metavar="dir",
        help="Path to wesnoth binary. By default assume it is with this script."
        + " If running on a Mac, passing 'xcode' as the path will attempt to locate the binary in Xcode derived data directories.")
    ap.add_argument("-l", "--list", metavar="filename",
        help="Loads list of tests from the given file.",
    default="wml_test_schedule")

    # Workaround for argparse not accepting option values that start with a hyphen,
    # for example "-a --user-data-dir". https://bugs.python.org/issue9334
    # New callers can use "-a=--user-data-dir", but compatibility with the old version
    # of run_wml_tests needs support for "-a --user-data-dir".
    try:
        while True:
            i = sys.argv.index("-a")
            sys.argv[i] = "=".join(["-a", sys.argv.pop(i + 1)])
    except IndexError:
        pass
    except ValueError:
        pass

    options = ap.parse_args()

    if options.verbose > 1:
        print(repr(options))

    batcher = test_nobatcher if options.batch_max == 1 else test_batcher
    test_list, test_summary = TestListParser(options).get(batcher)
    runner = WesnothRunner(options)

    for batch in test_list:
        try:
            runner.run_tests(batch, test_summary)
        except UnexpectedTestStatusException as e:
            if test_summary:
                raise RuntimeError("Logic error in run_wml_tests - a test has failed, but test_summary says everything passed")

    if options.verbose > 0 or not test_summary:
        print(test_summary)
    else:
        print("WML Unit Tests Result:", len(test_summary.passed), "of", test_summary.total, "tests passed")

    if not test_summary:
        sys.exit(1)