forked from linkedin/dr-elephant
-
Notifications
You must be signed in to change notification settings - Fork 0
/
FetcherConf.xml
73 lines (64 loc) · 2.66 KB
/
FetcherConf.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright 2016 LinkedIn Corp.
Licensed under the Apache License, Version 2.0 (the "License"); you may not
use this file except in compliance with the License. You may obtain a copy of
the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
License for the specific language governing permissions and limitations under
the License.
-->
<!-- Data fetchers configurations
A Fetcher implements ElephantFetcher interface and help fetch a certain application type data.
Example:
<fetcher>
# Choose the application type that this fetcher is for
<applicationtype>mapreduce</applicationtype>
# Specify the implementation class
<classname>com.linkedin.drelephant.mapreduce.fetchers.MapReduceFetcherHadoop2</classname>
</fetcher>
-->
<fetchers>
<fetcher>
<applicationtype>mapreduce</applicationtype>
<classname>com.linkedin.drelephant.mapreduce.fetchers.MapReduceFetcherHadoop2</classname>
<params>
<sampling_enabled>false</sampling_enabled>
</params>
</fetcher>
<!--
This is an experimental replacement for the MapReduceFetcherHadoop2 that attempts to burn
through queues of jobs faster by pulling data directly from HDFS rather than going through
the job history server.
Increasing the param history_log_size_limit_in_mb allows this fetcher to accept larger log
files, but also increase the risk of OutOfMemory error. The default heap size of Dr. Elephant
is 1024MB. To increase this, e.g. to 2048MB, run this before start.sh:
export OPTS="-mem 2048"
-->
<!--
<fetcher>
<applicationtype>mapreduce</applicationtype>
<classname>com.linkedin.drelephant.mapreduce.fetchers.MapReduceFSFetcherHadoop2</classname>
<params>
<sampling_enabled>false</sampling_enabled>
<history_log_size_limit_in_mb>500</history_log_size_limit_in_mb>
</params>
</fetcher>
-->
<fetcher>
<applicationtype>spark</applicationtype>
<classname>org.apache.spark.deploy.history.SparkFSFetcher</classname>
<!--
<params>
<event_log_size_limit_in_mb>100</event_log_size_limit_in_mb>
<event_log_dir>/system/spark-history</event_log_dir>
<spark_log_ext>_1.snappy</spark_log_ext>
#the values specified in namenode_addresses will be used for obtaining spark logs. The cluster configuration will be ignored.
<namenode_addresses>address1,address2</namenode_addresses>
</params>
-->
</fetcher>
</fetchers>