Skip to content

Commit

Permalink
Add benchy support for per-VM environment variables
Browse files Browse the repository at this point in the history
Summary: Adds support for specifying different environment variables
for each virtual machine. This will allow for easier tuning of various
constants in the VM as well as reduce overall build times when doing
performance comparisons with a single branch.

Reviewed By: @jdelong

Differential Revision: D1623580
  • Loading branch information
Mark Hahnenberg authored and hhvm-bot committed Oct 18, 2014
1 parent 6be2875 commit 362a441
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 20 deletions.
46 changes: 39 additions & 7 deletions hphp/tools/benchy/benchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from __future__ import unicode_literals
import argparse
import os
import re
import shlex
import subprocess

Expand Down Expand Up @@ -52,9 +53,10 @@ def set_verbose_level(level):

class Branch(object):
"""A branch within a repository, i.e. the basic unit of comparison."""
def __init__(self, name):
def __init__(self, name, env):
self.name = name
self.uid = _unique_id()
self.env = env

def build_dir(self):
"""Returns the build directory for this branch.
Expand All @@ -69,17 +71,43 @@ def root_dir(self):
"""
return os.path.join(self.build_dir(), config.BUILD_INTERNAL_PATH)

def result_file(self):
return "{0.name}.{0.uid}".format(self)

def format(self):
if len(self.env) > 0:
return "{0.name}:{0.env}:{1}".format(self, self.root_dir())
else:
return "{0.name}:{1}".format(self, self.root_dir())
Branch.pattern = r'([^:]+)((?::[^:]+)+)?'


def parse_branches(raw_branches):
"""Maps branch names and to Branch objects.
"""
branches = []
for raw_branch in raw_branches:
branches.append(Branch(raw_branch))
result = re.match(Branch.pattern, raw_branch)
if result is None:
raise RuntimeError("Invalid branch format: %s" % raw_branch)
name = result.group(1)
env = '' if result.group(2) is None else result.group(2)[1:]
branches.append(Branch(name, env))
return branches


def unique_branches(branches):
result = []
seen = set()
for branch in branches:
if branch.name in seen:
continue
seen.add(branch.name)
result.append(branch)
return result


def run_command(cmd, env=None, stdout=None):
"""Runs a command and checks the return code for errors.
Expand All @@ -94,7 +122,7 @@ def build_branches(branches):
"""Builds each of the branches into their own directories.
"""
for branch in branches:
for branch in unique_branches(branches):
build_dir = branch.build_dir()
if os.path.isfile(build_dir):
os.remove(build_dir)
Expand All @@ -116,7 +144,7 @@ def run_benchmarks(suites, benchmarks, run_perf, inner, outer, branches):
perf_str = '--perf' if run_perf else ''
inner_str = '' if inner is None else '--inner {0}'.format(inner)
outer_str = '' if outer is None else '--outer {0}'.format(outer)
branch_str = ' '.join(["%s:%s" % (b.name, b.root_dir()) for b in branches])
branch_str = ' '.join([b.format() for b in branches])

command = "{harness} {suites} {benchmarks} {perf} {inner} {outer} {branch}"
run_command(command.format(harness=benchy_path,
Expand All @@ -140,7 +168,7 @@ def process_results(branches, output_mode):
for branch in branches:
counter += 1
runlog = os.path.join(config.WORK_DIR, "runlog.%d" % counter)
result_path = os.path.join(config.WORK_DIR, branch.name)
result_path = os.path.join(config.WORK_DIR, branch.result_file())
with open(result_path, 'w') as result_file:
cmd = "{anymean} --geomean {runlog}"
run_command(cmd.format(anymean=anymean, runlog=runlog),
Expand Down Expand Up @@ -172,8 +200,12 @@ def main():
parser.add_argument('--outer', action='store', type=int,
help='Number of instances of the VM to run for each '
'benchmark')
parser.add_argument('branch', nargs='+', type=str, metavar='BRANCH',
help='Branch to benchmark')
parser.add_argument('branch', nargs='+', type=str,
metavar=r'BRANCH',
help='Branch to benchmark. Can also add a colon-'
'separated list of environment variables to set '
'when benchmarking this branch. E.g. '
'BRANCH:VAR1=VAL1:VAR2=VAL2')
parser.add_argument('--remarkup', action='store_const', const=True,
default=False, help='Spit out the results as Remarkup')
parser.add_argument('--perf', action='store_const', const=True,
Expand Down
59 changes: 46 additions & 13 deletions hphp/tools/benchy/benchy_harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,16 +108,25 @@ class VirtualMachine(object):
"""A single named executable with which to run benchmarks and measure.
"""
def __init__(self, name, path):
def __init__(self, name, path, env):
self.name = name
self.path = path
self.uid = _unique_id()
self.env = env

def __str__(self):
return "{0.name}".format(self)

def __repr__(self):
return self.__str__()
VirtualMachine.pattern = r'(?P<name>[^:]+)(?P<env_path>(?::[^:]+)+)'


def parse_env(env_list):
"""Parses a series of environment variable assignment clauses.
"""
return dict([x.split('=') for x in env_list])


def load_benchmark_suites():
Expand Down Expand Up @@ -203,6 +212,27 @@ def warmup_lines_to_chop(benchmark, warmup):
return lines_to_chop


def set_env(env):
"""Returns a series of lines to set all the environment variables to their
corresponding values in env.
"""
lines = []
for key, value in env.iteritems():
lines.append("export {0}={1}".format(key, value))
return '\n'.join(lines)


def unset_env(env):
"""Returns a series of lines to unset all the environment variables in env.
"""
lines = []
for key, _ in env.iteritems():
lines.append("unset {0}".format(key))
return '\n'.join(lines)


def single_run(**kwargs):
"""Generates the necessary shell-fu for a single benchmark invocation.
Expand All @@ -214,7 +244,9 @@ def single_run(**kwargs):
printf "include 'util.php';\\n" >> {include}
printf "include '{bench.path}';\\n" >> {include}
printf "QueueRuns({extra_iters}, \\${bench.name});\\n" >> {include}
{setenv}
{wrapper} --compile --build-root={vm.path} {perf} -- {harness} > {tmp}
{unsetenv}
cat {tmp} | tail -n +{lines_to_chop} >> {runlog}
"""
lines = template.format(**kwargs).split('\n')[1:-1]
Expand All @@ -235,7 +267,7 @@ def generate_runscript(vms, benchmarks_to_run, run_perf, warmup, inner, outer):
final_runlist.append((virtual_machine, benchmark))
random.shuffle(final_runlist)

lines = []
lines = ['set -e']
for i in range(len(final_runlist)):
virtual_machine, benchmark = final_runlist[i]
runlog_path = config.RUNLOG_PATH + ('.{0.uid}'.format(virtual_machine))
Expand All @@ -255,7 +287,9 @@ def generate_runscript(vms, benchmarks_to_run, run_perf, warmup, inner, outer):
include=config.INCLUDE_PATH,
wrapper=config.WRAPPER_PATH,
harness=config.BENCH_ENTRY_PATH,
tmp=config.TMP_PATH))
tmp=config.TMP_PATH,
setenv=set_env(virtual_machine.env),
unsetenv=unset_env(virtual_machine.env)))
lines.append("printf '\\a\\n'")

with open(config.RUNSCRIPT_PATH, 'w') as runscript:
Expand All @@ -279,19 +313,15 @@ def parse_virtual_machines(raw_vms):
"""
vms = []
vm_pattern = r'(?:(.*):)?(.*)'
counter = 0
for raw_vm in raw_vms:
counter += 1
result = re.match(vm_pattern, raw_vm)
result = re.match(VirtualMachine.pattern, raw_vm)
if result is None:
raise RuntimeError("Invalid format for VM: %s" % raw_vm)
name = result.group(1)
path = str(result.group(2))
name = result.group('name')
env_path = result.group('env_path').split(':')[1:]
env, path = parse_env(env_path[:-1]), str(env_path[-1])

if name is None:
name = "VM #%d" % counter
vms.append(VirtualMachine(name, path))
vms.append(VirtualMachine(name, path, env))
return vms


Expand Down Expand Up @@ -319,7 +349,10 @@ def main():
parser.add_argument('--warmup', action='store', type=int, default=1,
help='Number of inner iterations to warmup the VM.')
parser.add_argument('vm', nargs='+', type=str, metavar='VM',
help='VM to benchmark')
help='VM to benchmark. Consists of NAME:PATH. Can also '
'add a colon-separated list of environment '
'variables to set when benchmarking this VM. '
'E.g. NAME:VAR1=VAL1:VAR2=VAL2:PATH')
args = parser.parse_args()

setup_workdir()
Expand Down

0 comments on commit 362a441

Please sign in to comment.