Skip to content

Commit

Permalink
Speed up SuperPMI mcs remove dup process (dotnet#33946)
Browse files Browse the repository at this point in the history
* Speed up SuperPMI `mcs -removeDup`

Create a "Hash" class that encapsulates the MD5 hashing that is
used to determine if two MCs are equivalent. Primarily, this
allows caching the Windows Crypto provider, which it is very slow
to acquire.

In addition, make some changes to avoid unnecessary memory allocations
and other unnecessary work.

The result is that `mcs -removeDup` is about 4x faster.

Much of the remaining cost is that we read, deserialize the MC,
then reserialize the MC (if unique), and finally destroy the in-memory MC.
There is a lot of memory allocation/deallocation in this process that
could possibly be avoided or improved for this scenario.

* Factor out remove dup code to a separate class

* Add new RemoveDup class

* Add `-dedup` deduplication to `mcs -merge`

Also add `-thin`.

With this,

```
mcs.exe -merge base.mch *.mc -recursive
mcs.exe -removeDup -thin base.mch nodup.mch
```

can be replaced with:

```
mcs.exe -merge -recursive -dedup -thin nodup.mch *.mc
```

The main benefit is avoiding creating a potentially very large base.mch file.
Related, the data being processed only needs to be loaded once.

* Fix Linux build break

* Adjust tools to use new `mcs -merge -dedup -thin` arguments

Adjust superpmi.py script and superpmicollect.cs unit test.

* Update readme documentation for SuperPMI

Add description of `mcs -merge -dedup -thin` arguments and usage.
  • Loading branch information
BruceForstall authored Mar 27, 2020
1 parent 8cfcd8c commit bd30e15
Show file tree
Hide file tree
Showing 22 changed files with 704 additions and 316 deletions.
55 changes: 15 additions & 40 deletions src/coreclr/scripts/superpmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,9 +497,6 @@ def collect(self):
# The base .MCH file path
self.base_mch_file = None

# No dup .MCH file path
self.nodup_mch_file = None

# Final .MCH file path
self.final_mch_file = None

Expand All @@ -510,12 +507,11 @@ def collect(self):

# Do a basic SuperPMI collect and validation:
# 1. Collect MC files by running a set of sample apps.
# 2. Merge the MC files into a single MCH using "mcs -merge *.mc -recursive".
# 3. Create a thin unique MCH by using "mcs -removeDup -thin".
# 4. Create a clean MCH by running SuperPMI over the MCH, and using "mcs -strip" to filter
# 2. Create a merged thin unique MCH by using "mcs -merge -recursive -dedup -thin base.mch *.mc".
# 3. Create a clean MCH by running SuperPMI over the MCH, and using "mcs -strip" to filter
# out any failures (if any).
# 5. Create a TOC using "mcs -toc".
# 6. Verify the resulting MCH file is error-free when running SuperPMI against it with the
# 4. Create a TOC using "mcs -toc".
# 5. Verify the resulting MCH file is error-free when running SuperPMI against it with the
# same JIT used for collection.
#
# MCH files are big. If we don't need them anymore, clean them up right away to avoid
Expand All @@ -528,7 +524,6 @@ def collect(self):
# Setup all of the temp locations
self.base_fail_mcl_file = os.path.join(temp_location, "basefail.mcl")
self.base_mch_file = os.path.join(temp_location, "base.mch")
self.nodup_mch_file = os.path.join(temp_location, "nodup.mch")

self.temp_location = temp_location

Expand Down Expand Up @@ -563,7 +558,6 @@ def collect(self):
self.__merge_mch_files__()

if not self.coreclr_args.has_verified_clean_mch:
self.__create_thin_unique_mch__()
self.__create_clean_mch_file__()
self.__create_toc__()
self.__verify_final_mch__()
Expand Down Expand Up @@ -694,13 +688,13 @@ def __merge_mc_files__(self):
""" Merge the mc files that were generated
Notes:
mcs -merge <s_baseMchFile> <s_tempDir>\*.mc -recursive
mcs -merge <s_baseMchFile> <s_tempDir>\*.mc -recursive -dedup -thin
"""

pattern = os.path.join(self.temp_location, "*.mc")

command = [self.mcs_path, "-merge", self.base_mch_file, pattern, "-recursive"]
command = [self.mcs_path, "-merge", self.base_mch_file, pattern, "-recursive", "-dedup", "-thin"]
print("Invoking: " + " ".join(command))
proc = subprocess.Popen(command)
proc.communicate()
Expand Down Expand Up @@ -732,51 +726,32 @@ def __merge_mch_files__(self):
if not os.path.isfile(self.base_mch_file):
raise RuntimeError("MCH file failed to be generated at: %s" % self.base_mch_file)

def __create_thin_unique_mch__(self):
""" Create a thin unique MCH
Notes:
<mcl> -removeDup -thin <s_baseMchFile> <s_nodupMchFile>
"""

command = [self.mcs_path, "-removeDup", "-thin", self.base_mch_file, self.nodup_mch_file]
print("Invoking: " + " ".join(command))
proc = subprocess.Popen(command)
proc.communicate()

if not os.path.isfile(self.nodup_mch_file):
raise RuntimeError("Error, no dup mch file not created correctly at: %s" % self.nodup_mch_file)

if not self.coreclr_args.skip_cleanup:
os.remove(self.base_mch_file)
self.base_mch_file = None

def __create_clean_mch_file__(self):
""" Create a clean mch file
Notes:
<SuperPMIPath> -p -f <s_baseFailMclFile> <s_nodupMchFile> <jitPath>
<SuperPMIPath> -p -f <s_baseFailMclFile> <s_baseMchFile> <jitPath>
if <s_baseFailMclFile> is non-empty:
<mcl> -strip <s_baseFailMclFile> <s_nodupMchFile> <s_finalMchFile>
<mcl> -strip <s_baseFailMclFile> <s_baseMchFile> <s_finalMchFile>
else
# copy/move nodup file to final file
# copy/move base file to final file
del <s_baseFailMclFile>
"""

command = [self.superpmi_path, "-p", "-f", self.base_fail_mcl_file, self.nodup_mch_file, self.jit_path]
command = [self.superpmi_path, "-p", "-f", self.base_fail_mcl_file, self.base_mch_file, self.jit_path]
print("Invoking: " + " ".join(command))
proc = subprocess.Popen(command)
proc.communicate()

if is_nonzero_length_file(self.base_fail_mcl_file):
command = [self.mcs_path, "-strip", self.base_fail_mcl_file, self.nodup_mch_file, self.final_mch_file]
command = [self.mcs_path, "-strip", self.base_fail_mcl_file, self.base_mch_file, self.final_mch_file]
print("Invoking: " + " ".join(command))
proc = subprocess.Popen(command)
proc.communicate()
else:
# Ideally we could just rename this file instead of copying it.
shutil.copy2(self.nodup_mch_file, self.final_mch_file)
shutil.copy2(self.base_mch_file, self.final_mch_file)

if not os.path.isfile(self.final_mch_file):
raise RuntimeError("Final mch file failed to be generated.")
Expand All @@ -785,9 +760,9 @@ def __create_clean_mch_file__(self):
if os.path.isfile(self.base_fail_mcl_file):
os.remove(self.base_fail_mcl_file)
self.base_fail_mcl_file = None
if os.path.isfile(self.nodup_mch_file):
os.remove(self.nodup_mch_file)
self.nodup_mch_file = None
if os.path.isfile(self.base_mch_file):
os.remove(self.base_mch_file)
self.base_mch_file = None

def __create_toc__(self):
""" Create a TOC file
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/src/ToolBox/superpmi/mcs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ include_directories(../superpmi-shared)
set(MCS_SOURCES
commandline.cpp
mcs.cpp
removedup.cpp
verbasmdump.cpp
verbconcat.cpp
verbdump.cpp
Expand All @@ -31,6 +32,7 @@ set(MCS_SOURCES
../superpmi-shared/callutils.cpp
../superpmi-shared/compileresult.cpp
../superpmi-shared/errorhandling.cpp
../superpmi-shared/hash.cpp
../superpmi-shared/logging.cpp
../superpmi-shared/mclist.cpp
../superpmi-shared/methodcontext.cpp
Expand Down
45 changes: 43 additions & 2 deletions src/coreclr/src/ToolBox/superpmi/mcs/commandline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,17 +80,21 @@ void CommandLine::DumpHelp(const char* program)
printf(" Check the integrity of each methodContext\n");
printf(" e.g. -integ a.mc\n");
printf("\n");
printf(" -merge outputfile pattern\n");
printf(" -merge outputfile pattern [-dedup [-thin]]\n");
printf(" Merge all the input files matching the pattern.\n");
printf(" With -dedup, skip duplicates when copying (like using -removeDup). With -thin, also strip CompileResults.\n");
printf(" e.g. -merge a.mch *.mc\n");
printf(" e.g. -merge a.mch c:\\foo\\bar\\*.mc\n");
printf(" e.g. -merge a.mch relpath\\*.mc\n");
printf(" e.g. -merge a.mch .\n");
printf(" e.g. -merge a.mch onedir\n");
printf(" e.g. -merge a.mch *.mc -dedup -thin\n");
printf("\n");
printf(" -merge outputfile pattern -recursive\n");
printf(" -merge outputfile pattern -recursive [-dedup [-thin]]\n");
printf(" Merge all the input files matching the pattern, in the specified and all child directories.\n");
printf(" With -dedup, skip duplicates when copying (like using -removeDup). With -thin, also strip CompileResults.\n");
printf(" e.g. -merge a.mch *.mc -recursive\n");
printf(" e.g. -merge a.mch *.mc -recursive -dedup -thin\n");
printf("\n");
printf(" -removeDup inputfile outputfile\n");
printf(" Copy methodContexts from inputfile to outputfile, skipping duplicates.\n");
Expand Down Expand Up @@ -246,6 +250,11 @@ bool CommandLine::Parse(int argc, char* argv[], /* OUT */ Options* o)
tempLen = strlen(argv[i]);
o->recursive = true;
}
else if ((_strnicmp(&argv[i][1], "dedup", argLen) == 0))
{
tempLen = strlen(argv[i]);
o->dedup = true;
}
else if ((_strnicmp(&argv[i][1], "toc", argLen) == 0))
{
tempLen = strlen(argv[i]);
Expand Down Expand Up @@ -390,6 +399,38 @@ bool CommandLine::Parse(int argc, char* argv[], /* OUT */ Options* o)
}
}

if (o->dedup)
{
if (!o->actionMerge)
{
LogError("CommandLine::Parse() '-dedup' requires -merge.");
DumpHelp(argv[0]);
return false;
}
}

if (o->stripCR)
{
if (o->actionMerge)
{
if (!o->dedup)
{
LogError("CommandLine::Parse() '-thin' in '-merge' requires -dedup.");
DumpHelp(argv[0]);
return false;
}
}
else if (o->actionRemoveDup || o->actionStrip || o->actionFracture || o->actionCopy)
{
}
else
{
LogError("CommandLine::Parse() '-thin' requires -merge, -removeDup, -strip, -fracture, or -copy.");
DumpHelp(argv[0]);
return false;
}
}

if (o->actionASMDump)
{
if ((!foundFile1) || (!foundFile2))
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/src/ToolBox/superpmi/mcs/commandline.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class CommandLine
, actionTOC(false)
, legacyCompare(false)
, recursive(false)
, dedup(false)
, stripCR(false)
, nameOfFile1(nullptr)
, nameOfFile2(nullptr)
Expand All @@ -57,6 +58,7 @@ class CommandLine
bool actionTOC;
bool legacyCompare;
bool recursive;
bool dedup;
bool stripCR;
char* nameOfFile1;
char* nameOfFile2;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/src/ToolBox/superpmi/mcs/mcs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ int __cdecl main(int argc, char* argv[])
}
if (o.actionMerge)
{
exitCode = verbMerge::DoWork(o.nameOfFile1, o.nameOfFile2, o.recursive);
exitCode = verbMerge::DoWork(o.nameOfFile1, o.nameOfFile2, o.recursive, o.dedup, o.stripCR);
}
if (o.actionCopy)
{
Expand Down
Loading

0 comments on commit bd30e15

Please sign in to comment.