HADOOP-12632: Enable Dr. Elephant 's mapper spill heuristics

raghukumark · Jul 13, 2015 · f37675b · f37675b
1 parent b8b15de
commit f37675b
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 15 deletions.
diff --git a/app/com/linkedin/drelephant/mapreduce/heuristics/MapperSpillHeuristic.java b/app/com/linkedin/drelephant/mapreduce/heuristics/MapperSpillHeuristic.java
@@ -36,9 +36,10 @@ public HeuristicResult apply(MapReduceApplicationData data) {
 
     HeuristicResult result = new HeuristicResult(HEURISTIC_NAME, severity);
 
-    result.addDetail("Number of spilled records ", Long.toString(totalSpills));
-    result.addDetail("Number of Mapper output records ", Long.toString(totalOutputRecords));
-    result.addDetail("Ratio of spilled records to mapper output records", Double.toString(ratioSpills));
+    result.addDetail("Number of tasks", Integer.toString(tasks.length));
+    result.addDetail("Avg spilled records per task", Long.toString(totalSpills/tasks.length));
+    result.addDetail("Avg output records per task", Long.toString(totalOutputRecords/tasks.length));
+    result.addDetail("Ratio of spilled records to output records", Double.toString(ratioSpills));
 
     return result;
 
@@ -68,8 +69,8 @@ public static Severity getSpillSeverity(double ratioSpills) {
     long normalizedSpillRatio = 0;
     //Normalize the ratio to integer.
     normalizedSpillRatio = (long) (ratioSpills * THRESHOLD_SPILL_FACTOR);
-    return Severity.getSeverityAscending(normalizedSpillRatio, (long) (1.25 * THRESHOLD_SPILL_FACTOR),
-        (long) (1.5 * THRESHOLD_SPILL_FACTOR), (long) (1.75 * THRESHOLD_SPILL_FACTOR),
-        (long) (2 * THRESHOLD_SPILL_FACTOR));
+    return Severity.getSeverityAscending(normalizedSpillRatio, (long) (1.9 * THRESHOLD_SPILL_FACTOR),
+        (long) (1.95 * THRESHOLD_SPILL_FACTOR), (long) (2 * THRESHOLD_SPILL_FACTOR),
+        (long) (3 * THRESHOLD_SPILL_FACTOR));
   }
 }
diff --git a/app/views/helpMapperSpill.scala.html b/app/views/helpMapperSpill.scala.html
@@ -1,5 +1,5 @@
 <p>
-    This analysis shows the amount of spill that occurs on mapper side in your code<br>
+    This heuristic gauges your mapper performance in a disk I/0 perspective. Mapper spill ratio (spilled records/output records) is a critical indicator to your mapper performance: if the ratio is close to 2, it generally means your mappers have large outputs that couldn't fit in in-memory sort buffer. If the ratio is higher than 2, the situation is even worse. Having large disk I/O wasted on sorting output records could seriously affect your mapper speed. To make it run faster, try our tentative recommendation. We newly enabled this heuristic and is still testing it! <br>
 </p>
 <p>
 
@@ -31,10 +31,11 @@ <h4 class="list-group-item-heading">Mapper Spill</h4>
 </p>
 <h3>Suggestions</h3>
 <p>
+This heuristic is less straightforward than others, and it requires deeper hadoop knowledge. We are still working on finalizing the recommendation. Feedbacks welcomed! You could try:
 <ol>
+<li> Increase the size of in-memory sort buffer (mapreduce.task.io.sort.mb), default 100M</li>
+<li> Increase the buffer spill percentage (mapreduce.map.sort.spill.percent, when it is reached a background thread will start spill buffer to disk), default value is 0.8.</li>
 <li> Use combiner to lower the map output size. </li>
-<li> Increase the size of map side sort buffer (io.sort.mb)</li>
-<li> Increase the size of index buffer (io.sort.record.percent) <lib>
-<li> Increase the spill percentage (io.sort.spill.percent), default value is 0.8.</li>
+<li> Compress mapper output (set mapreduce.map.output.compress and mapreduce.map.output.compress.codec)</li>
 </ol>
 </p>
diff --git a/test/com/linkedin/drelephant/mapreduce/heuristics/MapperSpillHeuristicTest.java b/test/com/linkedin/drelephant/mapreduce/heuristics/MapperSpillHeuristicTest.java
@@ -16,19 +16,19 @@ public class MapperSpillHeuristicTest extends TestCase {
   private static final int numTasks = 100;
 
   public void testCritical() throws IOException {
-    assertEquals(Severity.CRITICAL, analyzeJob(2200, 1000));
+    assertEquals(Severity.CRITICAL, analyzeJob(3000, 1000));
   }
 
   public void testSevere() throws IOException {
-    assertEquals(Severity.SEVERE, analyzeJob(1755, 1000));
+    assertEquals(Severity.SEVERE, analyzeJob(2000, 1000));
   }
 
   public void testModerate() throws IOException {
-    assertEquals(Severity.MODERATE, analyzeJob(1555, 1000));
+    assertEquals(Severity.MODERATE, analyzeJob(1980, 1000));
   }
 
   public void testLow() throws IOException {
-    assertEquals(Severity.LOW, analyzeJob(1352, 1000));
+    assertEquals(Severity.LOW, analyzeJob(1900, 1000));
   }
 
   public void testNone() throws IOException {
@@ -44,7 +44,7 @@ private Severity analyzeJob(long spilledRecords, long mapRecords) throws IOExcep
     counter.set(MapReduceCounterHolder.CounterName.MAP_OUTPUT_RECORDS, mapRecords);
 
     for (int i=0; i < numTasks; i++) {
-      mappers[i] = new MapReduceTaskData(counter, new long[] { 0, 5, 5, 5 });
+      mappers[i] = new MapReduceTaskData(counter, new long[4]);
     }
 
     MapReduceApplicationData data = new MapReduceApplicationData().setCounters(jobCounter).setMapperData(mappers);