Add benchy support for per-VM environment variables

Summary: Adds support for specifying different environment variables for each virtual machine. This will allow for easier tuning of various constants in the VM as well as reduce overall build times when doing performance comparisons with a single branch. Reviewed By: @jdelong Differential Revision: D1623580
wdmchaft · Oct 18, 2014 · 362a441 · 362a441
1 parent 6be2875
commit 362a441
Show file tree

Hide file tree

Showing 2 changed files with 85 additions and 20 deletions.
diff --git a/hphp/tools/benchy/benchy.py b/hphp/tools/benchy/benchy.py
@@ -16,6 +16,7 @@
 from __future__ import unicode_literals
 import argparse
 import os
+import re
 import shlex
 import subprocess
 
@@ -52,9 +53,10 @@ def set_verbose_level(level):
 
 class Branch(object):
     """A branch within a repository, i.e. the basic unit of comparison."""
-    def __init__(self, name):
+    def __init__(self, name, env):
         self.name = name
         self.uid = _unique_id()
+        self.env = env
 
     def build_dir(self):
         """Returns the build directory for this branch.
@@ -69,17 +71,43 @@ def root_dir(self):
         """
         return os.path.join(self.build_dir(), config.BUILD_INTERNAL_PATH)
 
+    def result_file(self):
+        return "{0.name}.{0.uid}".format(self)
+
+    def format(self):
+        if len(self.env) > 0:
+            return "{0.name}:{0.env}:{1}".format(self, self.root_dir())
+        else:
+            return "{0.name}:{1}".format(self, self.root_dir())
+Branch.pattern = r'([^:]+)((?::[^:]+)+)?'
+
 
 def parse_branches(raw_branches):
     """Maps branch names and to Branch objects.
 
     """
     branches = []
     for raw_branch in raw_branches:
-        branches.append(Branch(raw_branch))
+        result = re.match(Branch.pattern, raw_branch)
+        if result is None:
+            raise RuntimeError("Invalid branch format: %s" % raw_branch)
+        name = result.group(1)
+        env = '' if result.group(2) is None else result.group(2)[1:]
+        branches.append(Branch(name, env))
     return branches
 
 
+def unique_branches(branches):
+    result = []
+    seen = set()
+    for branch in branches:
+        if branch.name in seen:
+            continue
+        seen.add(branch.name)
+        result.append(branch)
+    return result
+
+
 def run_command(cmd, env=None, stdout=None):
     """Runs a command and checks the return code for errors.
 
@@ -94,7 +122,7 @@ def build_branches(branches):
     """Builds each of the branches into their own directories.
 
     """
-    for branch in branches:
+    for branch in unique_branches(branches):
         build_dir = branch.build_dir()
         if os.path.isfile(build_dir):
             os.remove(build_dir)
@@ -116,7 +144,7 @@ def run_benchmarks(suites, benchmarks, run_perf, inner, outer, branches):
     perf_str = '--perf' if run_perf else ''
     inner_str = '' if inner is None else '--inner {0}'.format(inner)
     outer_str = '' if outer is None else '--outer {0}'.format(outer)
-    branch_str = ' '.join(["%s:%s" % (b.name, b.root_dir()) for b in branches])
+    branch_str = ' '.join([b.format() for b in branches])
 
     command = "{harness} {suites} {benchmarks} {perf} {inner} {outer} {branch}"
     run_command(command.format(harness=benchy_path,
@@ -140,7 +168,7 @@ def process_results(branches, output_mode):
     for branch in branches:
         counter += 1
         runlog = os.path.join(config.WORK_DIR, "runlog.%d" % counter)
-        result_path = os.path.join(config.WORK_DIR, branch.name)
+        result_path = os.path.join(config.WORK_DIR, branch.result_file())
         with open(result_path, 'w') as result_file:
             cmd = "{anymean} --geomean {runlog}"
             run_command(cmd.format(anymean=anymean, runlog=runlog),
@@ -172,8 +200,12 @@ def main():
     parser.add_argument('--outer', action='store', type=int,
                         help='Number of instances of the VM to run for each '
                              'benchmark')
-    parser.add_argument('branch', nargs='+', type=str, metavar='BRANCH',
-                        help='Branch to benchmark')
+    parser.add_argument('branch', nargs='+', type=str,
+                        metavar=r'BRANCH',
+                        help='Branch to benchmark. Can also add a colon-'
+                             'separated list of environment variables to set '
+                             'when benchmarking this branch. E.g. '
+                             'BRANCH:VAR1=VAL1:VAR2=VAL2')
     parser.add_argument('--remarkup', action='store_const', const=True,
                         default=False, help='Spit out the results as Remarkup')
     parser.add_argument('--perf', action='store_const', const=True,

diff --git a/hphp/tools/benchy/benchy_harness.py b/hphp/tools/benchy/benchy_harness.py
@@ -108,16 +108,25 @@ class VirtualMachine(object):
     """A single named executable with which to run benchmarks and measure.
 
     """
-    def __init__(self, name, path):
+    def __init__(self, name, path, env):
         self.name = name
         self.path = path
         self.uid = _unique_id()
+        self.env = env
 
     def __str__(self):
         return "{0.name}".format(self)
 
     def __repr__(self):
         return self.__str__()
+VirtualMachine.pattern = r'(?P<name>[^:]+)(?P<env_path>(?::[^:]+)+)'
+
+
+def parse_env(env_list):
+    """Parses a series of environment variable assignment clauses.
+
+    """
+    return dict([x.split('=') for x in env_list])
 
 
 def load_benchmark_suites():
@@ -203,6 +212,27 @@ def warmup_lines_to_chop(benchmark, warmup):
     return lines_to_chop
 
 
+def set_env(env):
+    """Returns a series of lines to set all the environment variables to their
+    corresponding values in env.
+
+    """
+    lines = []
+    for key, value in env.iteritems():
+        lines.append("export {0}={1}".format(key, value))
+    return '\n'.join(lines)
+
+
+def unset_env(env):
+    """Returns a series of lines to unset all the environment variables in env.
+
+    """
+    lines = []
+    for key, _ in env.iteritems():
+        lines.append("unset {0}".format(key))
+    return '\n'.join(lines)
+
+
 def single_run(**kwargs):
     """Generates the necessary shell-fu for a single benchmark invocation.
 
@@ -214,7 +244,9 @@ def single_run(**kwargs):
     printf "include 'util.php';\\n" >> {include}
     printf "include '{bench.path}';\\n" >> {include}
     printf "QueueRuns({extra_iters}, \\${bench.name});\\n" >> {include}
+    {setenv}
     {wrapper} --compile --build-root={vm.path} {perf} -- {harness} > {tmp}
+    {unsetenv}
     cat {tmp} | tail -n +{lines_to_chop} >> {runlog}
     """
     lines = template.format(**kwargs).split('\n')[1:-1]
@@ -235,7 +267,7 @@ def generate_runscript(vms, benchmarks_to_run, run_perf, warmup, inner, outer):
                 final_runlist.append((virtual_machine, benchmark))
     random.shuffle(final_runlist)
 
-    lines = []
+    lines = ['set -e']
     for i in range(len(final_runlist)):
         virtual_machine, benchmark = final_runlist[i]
         runlog_path = config.RUNLOG_PATH + ('.{0.uid}'.format(virtual_machine))
@@ -255,7 +287,9 @@ def generate_runscript(vms, benchmarks_to_run, run_perf, warmup, inner, outer):
             include=config.INCLUDE_PATH,
             wrapper=config.WRAPPER_PATH,
             harness=config.BENCH_ENTRY_PATH,
-            tmp=config.TMP_PATH))
+            tmp=config.TMP_PATH,
+            setenv=set_env(virtual_machine.env),
+            unsetenv=unset_env(virtual_machine.env)))
     lines.append("printf '\\a\\n'")
 
     with open(config.RUNSCRIPT_PATH, 'w') as runscript:
@@ -279,19 +313,15 @@ def parse_virtual_machines(raw_vms):
 
     """
     vms = []
-    vm_pattern = r'(?:(.*):)?(.*)'
-    counter = 0
     for raw_vm in raw_vms:
-        counter += 1
-        result = re.match(vm_pattern, raw_vm)
+        result = re.match(VirtualMachine.pattern, raw_vm)
         if result is None:
             raise RuntimeError("Invalid format for VM: %s" % raw_vm)
-        name = result.group(1)
-        path = str(result.group(2))
+        name = result.group('name')
+        env_path = result.group('env_path').split(':')[1:]
+        env, path = parse_env(env_path[:-1]), str(env_path[-1])
 
-        if name is None:
-            name = "VM #%d" % counter
-        vms.append(VirtualMachine(name, path))
+        vms.append(VirtualMachine(name, path, env))
     return vms
 
 
@@ -319,7 +349,10 @@ def main():
     parser.add_argument('--warmup', action='store', type=int, default=1,
                         help='Number of inner iterations to warmup the VM.')
     parser.add_argument('vm', nargs='+', type=str, metavar='VM',
-                        help='VM to benchmark')
+                        help='VM to benchmark. Consists of NAME:PATH. Can also '
+                             'add a colon-separated list of environment '
+                             'variables to set when benchmarking this VM. '
+                             'E.g. NAME:VAR1=VAL1:VAR2=VAL2:PATH')
     args = parser.parse_args()
 
     setup_workdir()