TUNING Updating default MR fetcher for performance (linkedin#300)

ajklili · Nov 10, 2017 · e756226 · e756226
1 parent 9c8915c
commit e756226
Showing 1 changed file with 7 additions and 5 deletions.
diff --git a/app-conf/FetcherConf.xml b/app-conf/FetcherConf.xml
@@ -29,27 +29,29 @@
   </fetcher>
 -->
 <fetchers>
+  <!--
   <fetcher>
     <applicationtype>mapreduce</applicationtype>
     <classname>com.linkedin.drelephant.mapreduce.fetchers.MapReduceFetcherHadoop2</classname>
     <params>
       <sampling_enabled>false</sampling_enabled>
     </params>
   </fetcher>
+  -->
   <!--
-     This is an experimental replacement for the MapReduceFetcherHadoop2 that attempts to burn
+     This is a replacement for the MapReduceFetcherHadoop2 that attempts to burn
      through queues of jobs faster by pulling data directly from HDFS rather than going through
      the job history server.
 
      Increasing the param history_log_size_limit_in_mb allows this fetcher to accept larger log
      files, but also increase the risk of OutOfMemory error. The default heap size of Dr. Elephant
-     is 1024MB. To increase this, e.g. to 2048MB, run this before start.sh:
-       export OPTS="-mem 2048"
+     is 1024MB. To increase this, e.g. to 2048MB, update the below mem conf in app-conf/elephant.conf:
+       jvm_args="-mem 2048"
 
      To work properly, this fetcher should use the same timezone with the job history server.
      If not set, the local timezone will be used.
    -->
-  <!--
+
   <fetcher>
     <applicationtype>mapreduce</applicationtype>
     <classname>com.linkedin.drelephant.mapreduce.fetchers.MapReduceFSFetcherHadoop2</classname>
@@ -59,7 +61,7 @@
       <history_server_time_zone>PST</history_server_time_zone>
     </params>
   </fetcher>
-  -->
+
 
   <!--
     FSFetcher for Spark. Loads the eventlog from HDFS and replays to get the metrics and application properties