[lit] Limit parallelism of sanitizer tests on Darwin [llvm part]

Running lit tests and unit tests of ASan and TSan on macOS has very bad performance when running with a high number of threads. This is caused by xnu (the macOS kernel), which currently doesn't handle mapping and unmapping of sanitizer shadow regions (reserved VM which are several terabytes large) very well. The situation is so bad that increasing the number of threads actually makes the total testing time larger. The macOS buildbots are affected by this. Note that we can't easily limit the number of sanitizer testing threads without affecting the rest of the tests. This patch adds a special "group" into lit, and limits the number of concurrently running tests in this group. This helps solve the contention problem, while still allowing other tests to run in full, that means running lit with -j8 will still with 8 threads, and parallelism is only limited in sanitizer tests. Differential Revision: https://reviews.llvm.org/D28420 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292231 91177308-0d34-0410-b5e6-96231b3b80d8
file-citas · Jan 17, 2017 · 518607f · 518607f
1 parent a9edfba
commit 518607f
Show file tree

Hide file tree

Showing 4 changed files with 28 additions and 4 deletions.
diff --git a/utils/lit/lit/LitConfig.py b/utils/lit/lit/LitConfig.py
@@ -24,7 +24,8 @@ def __init__(self, progname, path, quiet,
                  noExecute, debug, isWindows,
                  params, config_prefix = None,
                  maxIndividualTestTime = 0,
-                 maxFailures = None):
+                 maxFailures = None,
+                 parallelism_groups = []):
         # The name of the test runner.
         self.progname = progname
         # The items to add to the PATH environment variable.
@@ -62,6 +63,7 @@ def __init__(self, progname, path, quiet,
 
         self.maxIndividualTestTime = maxIndividualTestTime
         self.maxFailures = maxFailures
+        self.parallelism_groups = parallelism_groups
 
     @property
     def maxIndividualTestTime(self):

diff --git a/utils/lit/lit/TestingConfig.py b/utils/lit/lit/TestingConfig.py
@@ -106,7 +106,7 @@ def __init__(self, parent, name, suffixes, test_format,
                  environment, substitutions, unsupported,
                  test_exec_root, test_source_root, excludes,
                  available_features, pipefail, limit_to_features = [],
-                 is_early = False):
+                 is_early = False, parallelism_group = ""):
         self.parent = parent
         self.name = str(name)
         self.suffixes = set(suffixes)
@@ -125,6 +125,7 @@ def __init__(self, parent, name, suffixes, test_format,
         self.limit_to_features = set(limit_to_features)
         # Whether the suite should be tested early in a given run.
         self.is_early = bool(is_early)
+        self.parallelism_group = parallelism_group
 
     def finish(self, litConfig):
         """finish() - Finish this config object, after loading is complete."""

diff --git a/utils/lit/lit/main.py b/utils/lit/lit/main.py
@@ -313,6 +313,13 @@ def main_with_tmp(builtinParameters):
 
     isWindows = platform.system() == 'Windows'
 
+    parallelism_groups = {}
+    if platform.system() == 'Darwin':
+      # Only run up to 3 64-bit sanitized processes simultaneously on Darwin.
+      # Using more scales badly and hogs the system due to inefficient handling
+      # of large mmap'd regions (terabytes) by the kernel.
+      parallelism_groups["darwin-64bit-sanitizer"] = 3
+
     # Create the global config object.
     litConfig = lit.LitConfig.LitConfig(
         progname = os.path.basename(sys.argv[0]),
@@ -327,7 +334,8 @@ def main_with_tmp(builtinParameters):
         params = userParams,
         config_prefix = opts.configPrefix,
         maxIndividualTestTime = maxIndividualTestTime,
-        maxFailures = opts.maxFailures)
+        maxFailures = opts.maxFailures,
+        parallelism_groups = parallelism_groups)
 
     # Perform test discovery.
     run = lit.run.Run(litConfig,

diff --git a/utils/lit/lit/run.py b/utils/lit/lit/run.py
@@ -175,9 +175,14 @@ class Run(object):
     def __init__(self, lit_config, tests):
         self.lit_config = lit_config
         self.tests = tests
+        self.parallelism_semaphores = ""
 
     def execute_test(self, test):
+        pg = test.config.parallelism_group
+        if callable(pg): pg = pg(test)
+
         result = None
+        if pg: self.parallelism_semaphores[pg].acquire()
         start_time = time.time()
         try:
             result = test.config.test_format.execute(test, self.lit_config)
@@ -189,6 +194,8 @@ def execute_test(self, test):
                 result = lit.Test.Result(code, output)
             elif not isinstance(result, lit.Test.Result):
                 raise ValueError("unexpected result from test execution")
+
+            result.elapsed = time.time() - start_time
         except KeyboardInterrupt:
             raise
         except:
@@ -198,7 +205,8 @@ def execute_test(self, test):
             output += traceback.format_exc()
             output += '\n'
             result = lit.Test.Result(lit.Test.UNRESOLVED, output)
-        result.elapsed = time.time() - start_time
+        finally:
+            if pg: self.parallelism_semaphores[pg].release()
 
         test.setResult(result)
 
@@ -231,6 +239,7 @@ def execute_tests(self, display, jobs, max_time=None,
             try:
                 task_impl = multiprocessing.Process
                 queue_impl = multiprocessing.Queue
+                sem_impl = multiprocessing.Semaphore
                 canceled_flag =  multiprocessing.Value('i', 0)
                 consumer = MultiprocessResultsConsumer(self, display, jobs)
             except:
@@ -242,9 +251,13 @@ def execute_tests(self, display, jobs, max_time=None,
         if not consumer:
             task_impl = threading.Thread
             queue_impl = queue.Queue
+            sem_impl = threading.Semaphore
             canceled_flag = LockedValue(0)
             consumer = ThreadResultsConsumer(display)
 
+        self.parallelism_semaphores = {k: sem_impl(v)
+            for k, v in self.lit_config.parallelism_groups.items()}
+
         # Create the test provider.
         provider = TestProvider(queue_impl, canceled_flag)
         handleFailures(provider, consumer, self.lit_config.maxFailures)