Skip to content

Commit

Permalink
LIHADOOP-16576 Make severity threshold values configurable for the he…
Browse files Browse the repository at this point in the history
…uristics.

RB=635070

G=superfriends-reviewers
R=annag,fli,rratti,shanm,viramach
A=fli
  • Loading branch information
akshayrai committed Jan 29, 2016
1 parent 4f83bec commit 474f61b
Show file tree
Hide file tree
Showing 49 changed files with 1,361 additions and 194 deletions.
27 changes: 20 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,16 +121,29 @@ cd /export/apps/elephant/
* **classname**: Fully qualified name of the class.
* **viewname**: Fully qualified name of the view.
* **hadoopversions**: Versions of Hadoop with which the heuristic is compatible.
* Optionally, if you wish to override the threshold values of the severities used in the Heuristic and use custom
threshold limits, you can specify them in the HeuristicConf.xml between params tag. See examples below.
* A sample entry in HeuristicConf.xml would look like,
```
<heuristic>
<applicationtype>mapreduce</applicationtype>
<heuristicname>Job Queue Limit</heuristicname>
<classname>com.linkedin.drelephant.mapreduce.heuristics.JobQueueLimitHeuristic</classname>
<viewname>views.html.helpJobQueueLimit</viewname>
<hadoopversions>
<version>1</version>
</hadoopversions>
<applicationtype>mapreduce</applicationtype>
<heuristicname>Job Queue Limit</heuristicname>
<classname>com.linkedin.drelephant.mapreduce.heuristics.JobQueueLimitHeuristic</classname>
<viewname>views.html.helpJobQueueLimit</viewname>
</heuristic>
```
* A sample entry showing how to override/configure severity thresholds would look like,
```
<heuristic>
<applicationtype>mapreduce</applicationtype>
<heuristicname>Mapper Data Skew</heuristicname>
<classname>com.linkedin.drelephant.mapreduce.heuristics.MapperDataSkewHeuristic</classname>
<viewname>views.html.help.mapreduce.helpMapperDataSkew</viewname>
<params>
<num_tasks_severity>10, 50, 100, 200</num_tasks_severity>
<deviation_severity>2, 4, 8, 16</deviation_severity>
<files_severity>1/8, 1/4, 1/2, 1</files_severity>
</params>
</heuristic>
```
* Run Doctor Elephant, it should now include the new heuristics.
5 changes: 5 additions & 0 deletions app-conf/FetcherConf.xml
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,10 @@
<fetcher>
<applicationtype>spark</applicationtype>
<classname>org.apache.spark.deploy.history.SparkFSFetcher</classname>
<!--
<params>
<event_log_size_limit_in_mb>100</event_log_size_limit_in_mb>
</params>
-->
</fetcher>
</fetchers>
112 changes: 99 additions & 13 deletions app-conf/HeuristicConf.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,72 +14,134 @@
License for the specific language governing permissions and limitations under
the License.
-->
<!-- Heuristics configurations, each heuristic will be loaded by a particular analyser.
-->
<!-- Heuristics configurations, each heuristic will be loaded by a particular analyser -->
<heuristics>

<!-- MAP-REDUCE HEURISTICS -->

<heuristic>
<applicationtype>mapreduce</applicationtype>
<heuristicname>Mapper Data Skew</heuristicname>
<classname>com.linkedin.drelephant.mapreduce.heuristics.MapperDataSkewHeuristic</classname>
<viewname>views.html.help.mapreduce.helpMapperDataSkew</viewname>
<!--<params>
<num_tasks_severity>10, 50, 100, 200</num_tasks_severity>
<deviation_severity>2, 4, 8, 16</deviation_severity>
<files_severity>1/8, 1/4, 1/2, 1</files_severity>
</params>-->
</heuristic>

<heuristic>
<applicationtype>mapreduce</applicationtype>
<heuristicname>Mapper GC</heuristicname>
<classname>com.linkedin.drelephant.mapreduce.heuristics.MapperGCHeuristic</classname>
<viewname>views.html.help.mapreduce.helpGC</viewname>
<!--<params>
<gc_ratio_severity>0.01, 0.02, 0.03, 0.04</gc_ratio_severity>
<runtime_severity_in_min>5, 10, 12, 15</runtime_severity_in_min>
</params>-->
</heuristic>

<heuristic>
<applicationtype>mapreduce</applicationtype>
<heuristicname>Mapper Time</heuristicname>
<classname>com.linkedin.drelephant.mapreduce.heuristics.MapperTimeHeuristic</classname>
<viewname>views.html.help.mapreduce.helpMapperTime</viewname>
<!--<params>
<short_runtime_severity_in_min>10, 4, 2, 1</short_runtime_severity_in_min>
<long_runtime_severity_in_min>15, 30, 60, 120</long_runtime_severity_in_min>
<num_tasks_severity>50, 101, 500, 1000</num_tasks_severity>
</params>-->
</heuristic>

<heuristic>
<applicationtype>mapreduce</applicationtype>
<heuristicname>Mapper Speed</heuristicname>
<classname>com.linkedin.drelephant.mapreduce.heuristics.MapperSpeedHeuristic</classname>
<viewname>views.html.help.mapreduce.helpMapperSpeed</viewname>
<!--<params>
<disk_speed_severity>1/2, 1/4, 1/8, 1/32</disk_speed_severity>
<runtime_severity_in_min>5, 10, 15, 30</runtime_severity_in_min>
</params>-->
</heuristic>

<!--
<heuristic>
<applicationtype>mapreduce</applicationtype>
<heuristicname>Mapper Spill</heuristicname>
<classname>com.linkedin.drelephant.mapreduce.heuristics.MapperSpillHeuristic</classname>
<viewname>views.html.help.mapreduce.helpMapperSpill</viewname>
<!--<params>
<spill_severity>2.01, 2.2, 2.5, 3</spill_severity>
<num_tasks_severity>50, 100, 500, 1000</num_tasks_severity>
</params>-->
</heuristic>
-->

<heuristic>
<applicationtype>mapreduce</applicationtype>
<heuristicname>Mapper GC</heuristicname>
<classname>com.linkedin.drelephant.mapreduce.heuristics.MapperGCHeuristic</classname>
<viewname>views.html.help.mapreduce.helpGC</viewname>
<heuristicname>Mapper Memory</heuristicname>
<classname>com.linkedin.drelephant.mapreduce.heuristics.MapperMemoryHeuristic</classname>
<viewname>views.html.help.mapreduce.helpMapperMemory</viewname>
<!--<params>
<container_memory_severity>1, 1.5, 2, 2.5</container_memory_severity>
<memory_ratio_severity>0.6, 0.5, 0.4, 0.3</memory_ratio_severity>
</params>-->
</heuristic>

<heuristic>
<applicationtype>mapreduce</applicationtype>
<heuristicname>Reducer Data Skew</heuristicname>
<classname>com.linkedin.drelephant.mapreduce.heuristics.ReducerDataSkewHeuristic</classname>
<viewname>views.html.help.mapreduce.helpReducerDataSkew</viewname>
<!--<params>
<num_tasks_severity>10, 50, 100, 200</num_tasks_severity>
<deviation_severity>2, 4, 8, 16</deviation_severity>
<files_severity>1/8, 1/4, 1/2, 1</files_severity>
</params>-->
</heuristic>

<heuristic>
<applicationtype>mapreduce</applicationtype>
<heuristicname>Reducer GC</heuristicname>
<classname>com.linkedin.drelephant.mapreduce.heuristics.ReducerGCHeuristic</classname>
<viewname>views.html.help.mapreduce.helpGC</viewname>
<!--<params>
<gc_ratio_severity>0.01, 0.02, 0.03, 0.04</gc_ratio_severity>
<runtime_severity_in_min>5, 10, 12, 15</runtime_severity_in_min>
</params>-->
</heuristic>

<heuristic>
<applicationtype>mapreduce</applicationtype>
<heuristicname>Reducer Time</heuristicname>
<classname>com.linkedin.drelephant.mapreduce.heuristics.ReducerTimeHeuristic</classname>
<viewname>views.html.help.mapreduce.helpReducerTime</viewname>
<!--<params>
<short_runtime_severity_in_min>10, 4, 2, 1</short_runtime_severity_in_min>
<long_runtime_severity_in_min>15, 30, 60, 120</long_runtime_severity_in_min>
<num_tasks_severity>50, 101, 500, 1000</num_tasks_severity>
</params>-->
</heuristic>

<heuristic>
<applicationtype>mapreduce</applicationtype>
<heuristicname>Shuffle &#38; Sort</heuristicname>
<classname>com.linkedin.drelephant.mapreduce.heuristics.ShuffleSortHeuristic</classname>
<viewname>views.html.help.mapreduce.helpShuffleSort</viewname>
<heuristicname>Reducer Memory</heuristicname>
<classname>com.linkedin.drelephant.mapreduce.heuristics.ReducerMemoryHeuristic</classname>
<viewname>views.html.help.mapreduce.helpReducerMemory</viewname>
<!--<params>
<container_memory_severity>1, 1.5, 2, 2.5</container_memory_severity>
<memory_ratio_severity>0.6, 0.5, 0.4, 0.3</memory_ratio_severity>
</params>-->
</heuristic>

<heuristic>
<applicationtype>mapreduce</applicationtype>
<heuristicname>Reducer GC</heuristicname>
<classname>com.linkedin.drelephant.mapreduce.heuristics.ReducerGCHeuristic</classname>
<viewname>views.html.help.mapreduce.helpGC</viewname>
<heuristicname>Shuffle &#38; Sort</heuristicname>
<classname>com.linkedin.drelephant.mapreduce.heuristics.ShuffleSortHeuristic</classname>
<viewname>views.html.help.mapreduce.helpShuffleSort</viewname>
<!--<params>
<runtime_severity_in_min>1, 5, 10, 30</runtime_severity_in_min>
<runtime_ratio_severity>1, 2, 4, 8</runtime_ratio_severity>
</params>-->
</heuristic>

<heuristic>
Expand All @@ -89,39 +151,63 @@
<viewname>views.html.help.mapreduce.helpException</viewname>
</heuristic>


<!-- SPARK HEURISTICS -->

<heuristic>
<applicationtype>spark</applicationtype>
<heuristicname>Spark Configuration Best Practice</heuristicname>
<classname>com.linkedin.drelephant.spark.heuristics.BestPropertiesConventionHeuristic</classname>
<viewname>views.html.help.spark.helpBestProperties</viewname>
<!--<params>
<driver_memory_severity_in_gb>4, 4, 8, 8</driver_memory_severity_in_gb>
<num_core_severity>2</num_core_severity>
</params>-->
</heuristic>

<heuristic>
<applicationtype>spark</applicationtype>
<heuristicname>Spark Memory Limit</heuristicname>
<classname>com.linkedin.drelephant.spark.heuristics.MemoryLimitHeuristic</classname>
<viewname>views.html.help.spark.helpMemoryLimit</viewname>
<!--<params>
<total_mem_severity_in_tb>0.5, 1, 1.5, 2</total_mem_severity_in_tb>
<mem_util_severity>0.8, 0.6, 0.4, 0.2</mem_util_severity>
</params>-->
</heuristic>

<heuristic>
<applicationtype>spark</applicationtype>
<heuristicname>Spark Stage Runtime</heuristicname>
<classname>com.linkedin.drelephant.spark.heuristics.StageRuntimeHeuristic</classname>
<viewname>views.html.help.spark.helpStageRuntime</viewname>
<!--<params>
<stage_runtime_severity_in_min>15, 30, 60, 60</stage_runtime_severity_in_min>
<stage_failure_rate_severity>0.3, 0.3, 0.5, 0.5</stage_failure_rate_severity>
<single_stage_tasks_failure_rate_severity>0.0, 0.3, 0.5, 0.5</single_stage_tasks_failure_rate_severity>
</params>-->
</heuristic>

<heuristic>
<applicationtype>spark</applicationtype>
<heuristicname>Spark Job Runtime</heuristicname>
<classname>com.linkedin.drelephant.spark.heuristics.JobRuntimeHeuristic</classname>
<viewname>views.html.help.spark.helpJobRuntime</viewname>
<!--<params>
<avg_job_failure_rate_severity>0.1, 0.3, 0.5, 0.5</avg_job_failure_rate_severity>
<single_job_failure_rate_severity>0.0, 0.3, 0.5, 0.5</single_job_failure_rate_severity>
</params>-->
</heuristic>

<heuristic>
<applicationtype>spark</applicationtype>
<heuristicname>Spark Executor Load Balance</heuristicname>
<classname>com.linkedin.drelephant.spark.heuristics.ExecutorLoadHeuristic</classname>
<viewname>views.html.help.spark.helpExecutorLoad</viewname>
<!--<params>
<looser_metric_deviation_severity>0.8, 1, 1.2, 1.4</looser_metric_deviation_severity>
<metric_deviation_severity>0.4, 0.6, 0.8, 1.0</metric_deviation_severity>
</params>-->
</heuristic>

<heuristic>
Expand Down
Loading

0 comments on commit 474f61b

Please sign in to comment.