Skip to content

Commit

Permalink
Improve superpmi collection steps (dotnet#652)
Browse files Browse the repository at this point in the history
Rearrange the "clean" and "remove dups" phases of the collection
steps. This saves a lot of time by avoiding JIT compiling
all the functions that we will just throw out because the MCs
are considered duplicates.

Fixes dotnet#646
  • Loading branch information
BruceForstall authored Dec 9, 2019
1 parent c87438d commit 5f1260b
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 137 deletions.
2 changes: 1 addition & 1 deletion src/coreclr/scripts/coreclr_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def check_and_return_default_product_location(product_location):
self.verify(args,
"core_root",
check_and_return_default_core_root,
"Error, incorrect core_root location.")
"Error, Core_Root could not be determined, or points to a location that doesn't exist.")

self.verify(args,
"product_location",
Expand Down
188 changes: 103 additions & 85 deletions src/coreclr/scripts/superpmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,6 @@

collect_parser.add_argument("--use_zapdisable", dest="use_zapdisable", default=False, action="store_true", help="Allow redundant calls to the systems libraries for more coverage.")

collect_parser.add_argument("--assume_unclean_mch", dest="assume_unclean_mch", default=False, action="store_true", help="Force clean the mch file. This is useful if the dataset is large and there are expected dups.")

# Allow for continuing a collection in progress
collect_parser.add_argument("-existing_temp_dir", dest="existing_temp_dir", default=None, nargs="?")
collect_parser.add_argument("--has_run_collection_command", dest="has_run_collection_command", default=False, action="store_true")
Expand Down Expand Up @@ -184,6 +182,48 @@
list_parser.add_argument("-build_type", dest="build_type", nargs='?', default="Checked")
list_parser.add_argument("-runtime_repo_root", dest="runtime_repo_root", default=os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))), help="Path of the dotnet/runtime repo root directory. Optional.")

################################################################################
# Helper functions
################################################################################

def is_zero_length_file(fpath):
""" Determine if a file system path refers to an existing file that is zero length
Args:
fpath (str) : file system path to test
Returns:
bool : true if the path is an existing file that is zero length
"""
return os.path.isfile(fpath) and os.stat(fpath).st_size == 0

def is_nonzero_length_file(fpath):
""" Determine if a file system path refers to an existing file that is non-zero length
Args:
fpath (str) : file system path to test
Returns:
bool : true if the path is an existing file that is non-zero length
"""
return os.path.isfile(fpath) and os.stat(fpath).st_size != 0

def make_safe_filename(s):
""" Turn a string into a string usable as a single file name component; replace illegal characters with underscores.
Args:
s (str) : string to convert to a file name
Returns:
(str) : The converted string
"""
def safe_char(c):
if c.isalnum():
return c
else:
return "_"
return "".join(safe_char(c) for c in s)

################################################################################
# Helper classes
################################################################################
Expand Down Expand Up @@ -372,8 +412,8 @@ def collect(self):
# The base .MCH file path
self.base_mch_file = None

# Clean .MCH file path
self.clean_mch_file = None
# No dup .MCH file path
self.nodup_mch_file = None

# Final .MCH file path
self.final_mch_file = None
Expand All @@ -386,15 +426,15 @@ def collect(self):
# Do a basic SuperPMI collect and validation:
# 1. Collect MC files by running a set of sample apps.
# 2. Merge the MC files into a single MCH using "mcs -merge *.mc -recursive".
# 3. Create a clean MCH by running SuperPMI over the MCH, and using "mcs -strip" to filter
# out any failures (if any).
# 4. Create a thin unique MCH by using "mcs -removeDup -thin".
# 5. Create a TOC using "mcs -toc".
# 6. Verify the resulting MCH file is error-free when running SuperPMI against it with the
# same JIT used for collection.
# 3. Create a thin unique MCH by using "mcs -removeDup -thin".
# 4. Create a clean MCH by running SuperPMI over the MCH, and using "mcs -strip" to filter
# out any failures (if any).
# 5. Create a TOC using "mcs -toc".
# 6. Verify the resulting MCH file is error-free when running SuperPMI against it with the
# same JIT used for collection.
#
# MCH files are big. If we don't need them anymore, clean them up right away to avoid
# running out of disk space in disk constrained situations.
# MCH files are big. If we don't need them anymore, clean them up right away to avoid
# running out of disk space in disk constrained situations.

passed = False

Expand All @@ -405,7 +445,7 @@ def collect(self):
self.final_fail_mcl_file = os.path.join(temp_location, "finalfail.mcl")

self.base_mch_file = os.path.join(temp_location, "base.mch")
self.clean_mch_file = os.path.join(temp_location, "clean.mch")
self.nodup_mch_file = os.path.join(temp_location, "nodup.mch")

self.temp_location = temp_location

Expand Down Expand Up @@ -438,8 +478,8 @@ def collect(self):
self.__merge_mch_files__()

if not self.coreclr_args.has_verified_clean_mch:
self.__create_clean_mch_file__()
self.__create_thin_unique_mch__()
self.__create_clean_mch_file__()
self.__create_toc__()
self.__verify_final_mch__()

Expand Down Expand Up @@ -517,17 +557,6 @@ def get_all_assemblies(location, root=True):

return assemblies

def make_safe_filename(s):
def safe_char(c):
if c.isalnum():
return c
else:
return "_"
return "".join(safe_char(c) for c in s)

def is_zero_length_file(fpath):
return os.path.isfile(fpath) and os.stat(fpath).st_size == 0

async def run_pmi(print_prefix, assembly, self):
""" Run pmi over all dlls
"""
Expand Down Expand Up @@ -575,14 +604,10 @@ async def run_pmi(print_prefix, assembly, self):

os.environ.update(old_env)

contents = os.listdir(self.temp_location)
mc_contents = [os.path.join(self.temp_location, item) for item in contents if ".mc" in item]

if len(mc_contents) == 0:
mc_files = [os.path.join(self.temp_location, item) for item in os.listdir(self.temp_location) if item.endswith(".mc")]
if len(mc_files) == 0:
raise RuntimeError("No .mc files generated.")

self.mc_contents = mc_contents

def __merge_mc_files__(self):
""" Merge the mc files that were generated
Expand All @@ -596,19 +621,16 @@ def __merge_mc_files__(self):
command = [self.mcs_path, "-merge", self.base_mch_file, pattern, "-recursive"]
print("Invoking: " + " ".join(command))
proc = subprocess.Popen(command)

proc.communicate()

if not os.path.isfile(self.mcs_path):
raise RuntimeError("mch file failed to be generated at: %s" % self.mcs_path)

contents = os.listdir(self.temp_location)
mc_contents = [os.path.join(self.temp_location, item) for item in contents if ".mc" in item and not ".mch" in item]
if not os.path.isfile(self.base_mch_file):
raise RuntimeError("MCH file failed to be generated at: %s" % self.base_mch_file)

# All the individual MC files are no longer necessary, now that we have
# merged them into the base.mch. Delete them.
if not self.coreclr_args.skip_cleanup:
for item in mc_contents:
mc_files = [os.path.join(self.temp_location, item) for item in os.listdir(self.temp_location) if item.endswith(".mc")]
for item in mc_files:
os.remove(item)

def __merge_mch_files__(self):
Expand All @@ -625,67 +647,67 @@ def __merge_mch_files__(self):
command = [self.mcs_path, "-concat", self.base_mch_file, item]
print("Invoking: " + " ".join(command))
proc = subprocess.Popen(command)

proc.communicate()

if not os.path.isfile(self.mcs_path):
raise RuntimeError("mch file failed to be generated at: %s" % self.mcs_path)
if not os.path.isfile(self.base_mch_file):
raise RuntimeError("MCH file failed to be generated at: %s" % self.base_mch_file)

def __create_thin_unique_mch__(self):
""" Create a thin unique MCH
Notes:
<mcl> -removeDup -thin <s_baseMchFile> <s_nodupMchFile>
"""

command = [self.mcs_path, "-removeDup", "-thin", self.base_mch_file, self.nodup_mch_file]
print("Invoking: " + " ".join(command))
proc = subprocess.Popen(command)
proc.communicate()

if not os.path.isfile(self.nodup_mch_file):
raise RuntimeError("Error, no dup mch file not created correctly at: %s" % self.nodup_mch_file)

if not self.coreclr_args.skip_cleanup:
os.remove(self.base_mch_file)
self.base_mch_file = None

def __create_clean_mch_file__(self):
""" Create a clean mch file based on the original
""" Create a clean mch file
Notes:
<SuperPMIPath> -p -f <s_baseFailMclFile> <s_baseMchFile> <jitPath>
<SuperPMIPath> -p -f <s_baseFailMclFile> <s_nodupMchFile> <jitPath>
if <s_baseFailMclFile> is non-empty:
<mcl> -strip <s_baseFailMclFile> <s_baseMchFile> <s_cleanMchFile>
<mcl> -strip <s_baseFailMclFile> <s_nodupMchFile> <s_finalMchFile>
else
# no need to copy, just change the names
clean_mch_file = base_mch_file
# copy/move nodup file to final file
del <s_baseFailMclFile>
"""

command = [self.superpmi_path, "-p", "-f", self.base_fail_mcl_file, self.base_mch_file, self.jit_path]
print (" ".join(command))
command = [self.superpmi_path, "-p", "-f", self.base_fail_mcl_file, self.nodup_mch_file, self.jit_path]
print("Invoking: " + " ".join(command))
proc = subprocess.Popen(command)

proc.communicate()

if os.path.isfile(self.base_fail_mcl_file) and os.stat(self.base_fail_mcl_file).st_size != 0:
command = [self.mcs_path, "-strip", self.base_fail_mcl_file, self.base_mch_file, self.clean_mch_file]
print (" ".join(command))
if is_nonzero_length_file(self.base_fail_mcl_file):
command = [self.mcs_path, "-strip", self.base_fail_mcl_file, self.nodup_mch_file, self.final_mch_file]
print("Invoking: " + " ".join(command))
proc = subprocess.Popen(command)

proc.communicate()
else:
self.clean_mch_file = self.base_mch_file
self.base_mch_file = None
# Ideally we could just rename this file instead of copying it.
shutil.copy2(self.nodup_mch_file, self.final_mch_file)

if not os.path.isfile(self.clean_mch_file):
raise RuntimeError("Clean mch file failed to be generated.")
if not os.path.isfile(self.final_mch_file):
raise RuntimeError("Final mch file failed to be generated.")

if not self.coreclr_args.skip_cleanup:
if os.path.isfile(self.base_fail_mcl_file):
os.remove(self.base_fail_mcl_file)
self.base_fail_mcl_file = None

def __create_thin_unique_mch__(self):
""" Create a thin unique MCH
Notes:
<mcl> -removeDup -thin <s_cleanMchFile> <s_finalMchFile>
"""

command = [self.mcs_path, "-removeDup", "-thin", self.clean_mch_file, self.final_mch_file]
proc = subprocess.Popen(command)
proc.communicate()

if not os.path.isfile(self.final_mch_file):
raise RuntimeError("Error, final mch file not created correctly.")

if not self.coreclr_args.skip_cleanup:
os.remove(self.clean_mch_file)
self.clean_mch_file = None
if os.path.isfile(self.nodup_mch_file):
os.remove(self.nodup_mch_file)
self.nodup_mch_file = None

def __create_toc__(self):
""" Create a TOC file
Expand All @@ -695,11 +717,12 @@ def __create_toc__(self):
"""

command = [self.mcs_path, "-toc", self.final_mch_file]
print("Invoking: " + " ".join(command))
proc = subprocess.Popen(command)
proc.communicate()

if not os.path.isfile(self.toc_file):
raise RuntimeError("Error, toc file not created correctly.")
raise RuntimeError("Error, toc file not created correctly at: %s" % self.toc_file)

def __verify_final_mch__(self):
""" Verify the resulting MCH file is error-free when running SuperPMI against it with the same JIT used for collection.
Expand All @@ -712,7 +735,7 @@ def __verify_final_mch__(self):
passed = spmi_replay.replay()

if not passed:
raise RuntimeError("Error unclean replay.")
raise RuntimeError("Error, unclean replay.")

################################################################################
# SuperPMI Replay
Expand Down Expand Up @@ -821,7 +844,7 @@ def replay(self):
print("Clean SuperPMI Replay")
return_code = True

if os.path.isfile(self.fail_mcl_file) and os.stat(self.fail_mcl_file).st_size != 0:
if is_nonzero_length_file(self.fail_mcl_file):
# Unclean replay.
#
# Save the contents of the fail.mcl file to dig into failures.
Expand Down Expand Up @@ -1017,7 +1040,7 @@ def replay_with_asm_diffs(self, previous_temp_location=None):
else:
return_code = 1;

if os.path.isfile(self.fail_mcl_file) and os.stat(self.fail_mcl_file).st_size != 0:
if is_nonzero_length_file(self.fail_mcl_file):
# Unclean replay.
#
# Save the contents of the fail.mcl file to dig into failures.
Expand Down Expand Up @@ -1080,7 +1103,7 @@ def replay_with_asm_diffs(self, previous_temp_location=None):
# There were diffs. Go through each method that created diffs and
# create a base/diff asm file with diffable asm. In addition, create
# a standalone .mc for easy iteration.
if os.path.isfile(self.diff_mcl_file) and os.stat(self.diff_mcl_file).st_size != 0 or self.coreclr_args.diff_with_code_only:
if is_nonzero_length_file(self.diff_mcl_file) or self.coreclr_args.diff_with_code_only:
# AsmDiffs.
#
# Save the contents of the fail.mcl file to dig into failures.
Expand Down Expand Up @@ -1866,11 +1889,6 @@ def setup_mch_arg(arg):
lambda unused: True,
"Unable to set existing_temp_dir.")

coreclr_args.verify(args,
"assume_unclean_mch",
lambda unused: True,
"Unable to set assume_unclean_mch.")

coreclr_args.verify(args,
"has_run_collection_command",
lambda unused: True,
Expand Down
Loading

0 comments on commit 5f1260b

Please sign in to comment.