From 33a5d996c6a3cc961824668edd908cebd05164aa Mon Sep 17 00:00:00 2001
From: lishid
Date: Tue, 15 Apr 2014 12:27:16 -0700
Subject: [PATCH] Minor changes to the help page, adding hadoop security to
work with keytab
---
README.md | 21 ++--
app/com/linkedin/drelephant/DrElephant.java | 1 -
.../linkedin/drelephant/ElephantAnalyser.java | 13 +++
.../linkedin/drelephant/ElephantFetcher.java | 54 +++++++++-
.../linkedin/drelephant/ElephantRunner.java | 100 ++++++++++--------
.../drelephant/analysis/Heuristic.java | 1 +
.../drelephant/analysis/HeuristicResult.java | 3 +-
.../drelephant/analysis/Severity.java | 10 +-
.../heuristics/ShuffleSortHeuristic.java | 25 +----
.../drelephant/hadoop/HadoopJobData.java | 9 +-
.../drelephant/hadoop/HadoopSecurity.java | 60 +++++++++++
.../drelephant/hadoop/HadoopTaskData.java | 59 ++++++-----
.../linkedin/drelephant/math/Statistics.java | 28 ++++-
.../drelephant/notifications/EmailThread.java | 6 ++
app/model/JobResult.java | 3 +
app/views/helpMapperDataSkew.scala.html | 1 +
app/views/helpMapperInputSize.scala.html | 6 +-
app/views/helpMapperSpeed.scala.html | 2 +-
app/views/helpReducerDataSkew.scala.html | 5 +-
app/views/helpReducerTime.scala.html | 10 +-
app/views/helpShuffleSort.scala.html | 4 +-
app/views/multijob.scala.html | 4 +-
app/views/singlejob.scala.html | 4 +-
conf/application.conf | 6 +-
24 files changed, 308 insertions(+), 127 deletions(-)
create mode 100644 app/com/linkedin/drelephant/hadoop/HadoopSecurity.java
diff --git a/README.md b/README.md
index 4d04a5cc4..af3c3a697 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,20 @@
## Dr Elephant
-### Compiling
+### Compiling & testing locally
-* This project uses Ant to compile and package the jar.
-* Running ant in the project directory will package the jar file ready for use.
+* To be able to build & run the application, download and install [Play framework 2.2.2](http://downloads.typesafe.com/play/2.2.2/play-2.2.2.zip).
+* To build and run the application in dev mode, run from command line "play run" in the project directory.
+* There is need to investigate the framework to see how one can add parameters to the classpath in dev mode.
-### Running
+### Deployment
-* To run the project, copy the package "dr-elephant.jar" to a machine that has hadoop setup.
-* Use the command 'hadoop jar dr-elephant.jar job_id' to start execution.
\ No newline at end of file
+* To create a deployment package, use "play dist" to create a zip package, or use "play universal:package-zip-tarball" to create a tarball
+* To run the deployed package with Hadoop properly, some changes needs to be added to the startup script located at ./bin/dr-elephant
+
+* in the classpath ("declare -r app\_classpath=...") , add to the end of the string, before the end quotes
+
+ :$HADOOP_HOME/*:$HADOOP_HOME/lib/*:$HADOOP_HOME/conf
+
+* after the next line ("addJava ... ;"), add new line
+
+ addJava "-Djava.library.path=$HADOOP_HOME/lib/native/Linux-amd64-64"
\ No newline at end of file
diff --git a/app/com/linkedin/drelephant/DrElephant.java b/app/com/linkedin/drelephant/DrElephant.java
index d67edf271..8ac895cf7 100644
--- a/app/com/linkedin/drelephant/DrElephant.java
+++ b/app/com/linkedin/drelephant/DrElephant.java
@@ -23,7 +23,6 @@ public static void analyse(String jobId) throws IOException {
HadoopJobData jobData = fetcher.getJobData(job_id);
ElephantAnalyser analyser = new ElephantAnalyser();
HeuristicResult[] result = analyser.analyse(jobData);
- //System.out.println(result);
}
public DrElephant() throws IOException {
diff --git a/app/com/linkedin/drelephant/ElephantAnalyser.java b/app/com/linkedin/drelephant/ElephantAnalyser.java
index aac90072c..8462eedcc 100644
--- a/app/com/linkedin/drelephant/ElephantAnalyser.java
+++ b/app/com/linkedin/drelephant/ElephantAnalyser.java
@@ -48,6 +48,19 @@ public HeuristicResult[] analyse(HadoopJobData data) {
return results.toArray(new HeuristicResult[results.size()]);
}
+ public String getJobType(HadoopJobData data) {
+ String pigVersion = data.getJobConf().getProperty("pig.version");
+ if (pigVersion != null && !pigVersion.isEmpty()) {
+ return "Pig";
+ }
+ String hiveMapredMode = data.getJobConf().getProperty("hive.mapred.mode");
+ if (hiveMapredMode != null && !hiveMapredMode.isEmpty()) {
+ return "Hive";
+ }
+
+ return "Unknown/Hadoop";
+ }
+
public static ElephantAnalyser instance() {
return instance;
}
diff --git a/app/com/linkedin/drelephant/ElephantFetcher.java b/app/com/linkedin/drelephant/ElephantFetcher.java
index e5066a4d6..d2a557768 100644
--- a/app/com/linkedin/drelephant/ElephantFetcher.java
+++ b/app/com/linkedin/drelephant/ElephantFetcher.java
@@ -3,11 +3,21 @@
import com.linkedin.drelephant.hadoop.HadoopCounterHolder;
import com.linkedin.drelephant.hadoop.HadoopJobData;
import com.linkedin.drelephant.hadoop.HadoopTaskData;
+import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.*;
+import org.apache.hadoop.security.authentication.client.AuthenticatedURL;
+import org.apache.hadoop.security.authentication.client.AuthenticationException;
import org.apache.log4j.Logger;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
import java.io.IOException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.util.Properties;
public class ElephantFetcher {
private static final Logger logger = Logger.getLogger(ElephantFetcher.class);
@@ -49,7 +59,18 @@ public HadoopJobData getJobData(JobID job_id) throws IOException {
reducers[i] = new HadoopTaskData(job, reducerTasks[i], true);
}
- return new HadoopJobData(counterHolder, mappers, reducers)
+ Properties jobConf = null;
+ try {
+ jobConf = getJobConf(job);
+ } catch (AuthenticationException e) {
+ e.printStackTrace();
+ }
+
+ if (jobConf == null) {
+ jobConf = new Properties();
+ }
+
+ return new HadoopJobData(counterHolder, mappers, reducers, jobConf)
.setUsername(username).setStartTime(startTime).setUrl(jobUrl).setJobName(jobName);
}
@@ -68,4 +89,35 @@ private TaskReport[] getReduceTaskReports(JobID job_id) throws IOException {
public JobStatus[] getJobList() throws IOException {
return jobClient.getAllJobs();
}
+
+ public Properties getJobConf(RunningJob job) throws IOException, AuthenticationException {
+ Properties properties = new Properties();
+ String jobconfUrl = getJobconfUrl(job);
+ if (jobconfUrl == null) {
+ return properties;
+ }
+
+ URL url = new URL(jobconfUrl);
+ AuthenticatedURL.Token token = new AuthenticatedURL.Token();
+ HttpURLConnection conn = new AuthenticatedURL().openConnection(url, token);
+ String data = IOUtils.toString(conn.getInputStream());
+ Document doc = Jsoup.parse(data);
+ Elements rows = doc.select("table").select("tr");
+ for (int i = 1; i < rows.size(); i++) {
+ Element row = rows.get(i);
+ Elements cells = row.select("> td");
+ if (cells.size() == 2) {
+ String key = cells.get(0).text().trim();
+ String value = cells.get(1).text().trim();
+ properties.put(key, value);
+ }
+ }
+ return properties;
+ }
+
+ private String getJobconfUrl(RunningJob job) {
+ String jobDetails = job.getTrackingURL();
+ String root = jobDetails.substring(0, jobDetails.indexOf("jobdetails.jsp"));
+ return root + "jobconf.jsp?jobid=" + job.getID().toString();
+ }
}
diff --git a/app/com/linkedin/drelephant/ElephantRunner.java b/app/com/linkedin/drelephant/ElephantRunner.java
index 9f2d2f5c7..c5cc3579b 100644
--- a/app/com/linkedin/drelephant/ElephantRunner.java
+++ b/app/com/linkedin/drelephant/ElephantRunner.java
@@ -4,6 +4,7 @@
import com.linkedin.drelephant.analysis.HeuristicResult;
import com.linkedin.drelephant.analysis.Severity;
import com.linkedin.drelephant.hadoop.HadoopJobData;
+import com.linkedin.drelephant.hadoop.HadoopSecurity;
import com.linkedin.drelephant.notifications.EmailThread;
import model.JobHeuristicResult;
import model.JobResult;
@@ -11,6 +12,7 @@
import org.apache.hadoop.mapred.JobStatus;
import org.apache.log4j.Logger;
+import java.security.PrivilegedAction;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
@@ -22,62 +24,71 @@ public class ElephantRunner implements Runnable {
private AtomicBoolean running = new AtomicBoolean(true);
private EmailThread emailer = new EmailThread();
private boolean firstRun = true;
+ private HadoopSecurity hadoopSecurity;
@Override
public void run() {
- Constants.load();
- emailer.start();
- try {
- ElephantFetcher fetcher = new ElephantFetcher();
- Set previousJobs = new HashSet();
- long lastRun;
-
- while (running.get()) {
- lastRun = System.currentTimeMillis();
-
+ hadoopSecurity = new HadoopSecurity();
+ hadoopSecurity.doAs(new PrivilegedAction() {
+ @Override
+ public Void run() {
+ Constants.load();
+ emailer.start();
try {
- logger.info("Fetching job list.");
- JobStatus[] jobs = fetcher.getJobList();
- if (jobs == null) {
- throw new IllegalArgumentException("Jobtracker returned 'null' for job list");
- }
-
- Set successJobs = filterSuccessfulJobs(jobs);
+ ElephantFetcher fetcher = new ElephantFetcher();
+ Set previousJobs = new HashSet();
+ long lastRun;
- successJobs = filterPreviousJobs(successJobs, previousJobs);
+ while (running.get()) {
+ lastRun = System.currentTimeMillis();
- logger.info(successJobs.size() + " jobs to analyse.");
-
- //Analyse all ready jobs
- for (JobID jobId : successJobs) {
try {
- analyzeJob(fetcher, jobId);
- previousJobs.add(jobId);
+ logger.info("Fetching job list.");
+ hadoopSecurity.checkLogin();
+ JobStatus[] jobs = fetcher.getJobList();
+ if (jobs == null) {
+ throw new IllegalArgumentException("Jobtracker returned 'null' for job list");
+ }
+
+ Set successJobs = filterSuccessfulJobs(jobs);
+
+ successJobs = filterPreviousJobs(successJobs, previousJobs);
+
+ logger.info(successJobs.size() + " jobs to analyse.");
+
+ //Analyse all ready jobs
+ for (JobID jobId : successJobs) {
+ try {
+ analyzeJob(fetcher, jobId);
+ previousJobs.add(jobId);
+ } catch (Exception e) {
+ logger.error("Error analysing job", e);
+ }
+ }
+ logger.info("Finished all jobs. Waiting for refresh.");
+
} catch (Exception e) {
- logger.error("Error analysing job", e);
+ logger.error("Error getting job list", e);
}
- }
- logger.info("Finished all jobs. Waiting for refresh.");
- } catch (Exception e) {
- logger.error("Error getting job list", e);
- }
-
- //Wait for long enough
- long nextRun = lastRun + WAIT_INTERVAL;
- long waitTime = nextRun - System.currentTimeMillis();
- while (running.get() && waitTime > 0) {
- try {
- Thread.sleep(waitTime);
- } catch (InterruptedException e) {
- e.printStackTrace();
+ //Wait for long enough
+ long nextRun = lastRun + WAIT_INTERVAL;
+ long waitTime = nextRun - System.currentTimeMillis();
+ while (running.get() && waitTime > 0) {
+ try {
+ Thread.sleep(waitTime);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ waitTime = nextRun - System.currentTimeMillis();
+ }
}
- waitTime = nextRun - System.currentTimeMillis();
+ } catch (Exception e) {
+ logger.error("Error in ElephantRunner", e);
}
+ return null;
}
- } catch (Exception e) {
- logger.error("Error in ElephantRunner", e);
- }
+ });
}
private Set filterSuccessfulJobs(JobStatus[] jobs) {
@@ -162,6 +173,9 @@ private void analyzeJob(ElephantFetcher fetcher, JobID jobId) throws Exception {
result.save();
+ //TODO: Save this
+ logger.info("Job Type: " + ElephantAnalyser.instance().getJobType(jobData));
+
emailer.enqueue(result);
}
diff --git a/app/com/linkedin/drelephant/analysis/Heuristic.java b/app/com/linkedin/drelephant/analysis/Heuristic.java
index 542a95a7f..073e90171 100644
--- a/app/com/linkedin/drelephant/analysis/Heuristic.java
+++ b/app/com/linkedin/drelephant/analysis/Heuristic.java
@@ -4,5 +4,6 @@
public interface Heuristic {
public HeuristicResult apply(HadoopJobData data);
+
public String getHeuristicName();
}
diff --git a/app/com/linkedin/drelephant/analysis/HeuristicResult.java b/app/com/linkedin/drelephant/analysis/HeuristicResult.java
index e61560e1f..b039a8f9f 100644
--- a/app/com/linkedin/drelephant/analysis/HeuristicResult.java
+++ b/app/com/linkedin/drelephant/analysis/HeuristicResult.java
@@ -1,6 +1,7 @@
package com.linkedin.drelephant.analysis;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.List;
public class HeuristicResult {
private String analysis;
diff --git a/app/com/linkedin/drelephant/analysis/Severity.java b/app/com/linkedin/drelephant/analysis/Severity.java
index caa990d9a..ba0a1635e 100644
--- a/app/com/linkedin/drelephant/analysis/Severity.java
+++ b/app/com/linkedin/drelephant/analysis/Severity.java
@@ -20,12 +20,12 @@ public enum Severity {
private int value;
private String text;
- private String color;
+ private String bootstrapColor;
- Severity(int value, String text, String color) {
+ Severity(int value, String text, String bootstrapColor) {
this.value = value;
this.text = text;
- this.color = color;
+ this.bootstrapColor = bootstrapColor;
}
public int getValue() {
@@ -36,8 +36,8 @@ public String getText() {
return text;
}
- public String getColor() {
- return color;
+ public String getBootstrapColor() {
+ return bootstrapColor;
}
public static Severity byValue(int value) {
diff --git a/app/com/linkedin/drelephant/analysis/heuristics/ShuffleSortHeuristic.java b/app/com/linkedin/drelephant/analysis/heuristics/ShuffleSortHeuristic.java
index 5a48137c4..3f9b4852d 100644
--- a/app/com/linkedin/drelephant/analysis/heuristics/ShuffleSortHeuristic.java
+++ b/app/com/linkedin/drelephant/analysis/heuristics/ShuffleSortHeuristic.java
@@ -8,9 +8,6 @@
import com.linkedin.drelephant.hadoop.HadoopTaskData;
import com.linkedin.drelephant.math.Statistics;
-import java.util.Arrays;
-import java.util.Collections;
-
public class ShuffleSortHeuristic implements Heuristic {
public static final String heuristicName = "Shuffle & Sort";
@@ -21,7 +18,7 @@ public String getHeuristicName() {
@Override
public HeuristicResult apply(HadoopJobData data) {
- HadoopTaskData[] tasks = createSample(data.getReducerData());
+ HadoopTaskData[] tasks = Statistics.createSample(HadoopTaskData.class, data.getReducerData(), Constants.SHUFFLE_SORT_MAX_SAMPLE_SIZE);
//Gather data
fetchShuffleSort(tasks);
@@ -58,26 +55,6 @@ public HeuristicResult apply(HadoopJobData data) {
return result;
}
- private HadoopTaskData[] createSample(HadoopTaskData[] reducers) {
- int MAX_SAMPLE_SIZE = Constants.SHUFFLE_SORT_MAX_SAMPLE_SIZE;
-
- //Skip this process if number of items already smaller than sample size
- if (reducers.length <= MAX_SAMPLE_SIZE) {
- return reducers;
- }
-
- HadoopTaskData[] result = new HadoopTaskData[MAX_SAMPLE_SIZE];
-
- //Shuffle a clone copy
- HadoopTaskData[] clone = reducers.clone();
- Collections.shuffle(Arrays.asList(clone));
-
- //Take the first n items
- System.arraycopy(clone, 0, result, 0, MAX_SAMPLE_SIZE);
-
- return result;
- }
-
private void fetchShuffleSort(HadoopTaskData[] reducers) {
for (HadoopTaskData reducer : reducers) {
reducer.fetchTaskDetails();
diff --git a/app/com/linkedin/drelephant/hadoop/HadoopJobData.java b/app/com/linkedin/drelephant/hadoop/HadoopJobData.java
index b44a0b784..3432bb1c3 100644
--- a/app/com/linkedin/drelephant/hadoop/HadoopJobData.java
+++ b/app/com/linkedin/drelephant/hadoop/HadoopJobData.java
@@ -1,6 +1,7 @@
package com.linkedin.drelephant.hadoop;
import java.io.IOException;
+import java.util.Properties;
public class HadoopJobData {
private String username = "";
@@ -10,11 +11,13 @@ public class HadoopJobData {
private HadoopCounterHolder counterHolder;
private HadoopTaskData[] mapperData;
private HadoopTaskData[] reducerData;
+ private Properties jobConf;
- public HadoopJobData(HadoopCounterHolder counters, HadoopTaskData[] mappers, HadoopTaskData[] reducers) throws IOException {
+ public HadoopJobData(HadoopCounterHolder counters, HadoopTaskData[] mappers, HadoopTaskData[] reducers, Properties jobConf) throws IOException {
counterHolder = counters;
mapperData = mappers;
reducerData = reducers;
+ this.jobConf = jobConf;
}
public HadoopJobData setUsername(String username) {
@@ -49,6 +52,10 @@ public HadoopTaskData[] getReducerData() {
return reducerData;
}
+ public Properties getJobConf() {
+ return jobConf;
+ }
+
public String getUsername() {
return username;
}
diff --git a/app/com/linkedin/drelephant/hadoop/HadoopSecurity.java b/app/com/linkedin/drelephant/hadoop/HadoopSecurity.java
new file mode 100644
index 000000000..53e34d8c0
--- /dev/null
+++ b/app/com/linkedin/drelephant/hadoop/HadoopSecurity.java
@@ -0,0 +1,60 @@
+package com.linkedin.drelephant.hadoop;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.log4j.Logger;
+import play.Play;
+
+import java.io.IOException;
+import java.security.PrivilegedAction;
+
+public class HadoopSecurity {
+ private static final Logger logger = Logger.getLogger(HadoopSecurity.class);
+
+ private UserGroupInformation loginUser = null;
+
+ private String keytabLocation;
+ private String keytabUser;
+ private boolean securityEnabled = false;
+
+ public HadoopSecurity() {
+ Configuration conf = new Configuration();
+ UserGroupInformation.setConfiguration(conf);
+ securityEnabled = UserGroupInformation.isSecurityEnabled();
+ if (securityEnabled) {
+ keytabLocation = Play.application().configuration().getString("keytab.location");
+ keytabUser = Play.application().configuration().getString("keytab.user");
+ checkLogin();
+ }
+ }
+
+ public UserGroupInformation getUGI() {
+ checkLogin();
+ return loginUser;
+ }
+
+ public void checkLogin() {
+ // try login
+ try {
+ if (loginUser == null) {
+ logger.info("No login user. Creating login user");
+ logger.info("Logging with " + keytabUser + " and " + keytabLocation);
+ UserGroupInformation.loginUserFromKeytab(keytabUser, keytabLocation);
+ loginUser = UserGroupInformation.getLoginUser();
+ logger.info("Logged in with user " + loginUser);
+ } else {
+ loginUser.checkTGTAndReloginFromKeytab();
+ }
+ } catch (IOException e) {
+ logger.error("Failed to login with kerberos ", e);
+ }
+ }
+
+ public T doAs(PrivilegedAction action) {
+ UserGroupInformation ugi = getUGI();
+ if (ugi != null) {
+ return ugi.doAs(action);
+ }
+ return null;
+ }
+}
diff --git a/app/com/linkedin/drelephant/hadoop/HadoopTaskData.java b/app/com/linkedin/drelephant/hadoop/HadoopTaskData.java
index d23dc0234..926077843 100644
--- a/app/com/linkedin/drelephant/hadoop/HadoopTaskData.java
+++ b/app/com/linkedin/drelephant/hadoop/HadoopTaskData.java
@@ -95,7 +95,7 @@ public void fetchTaskDetails() {
return;
}
try {
- URL url = new URL(this.url);
+ URL url = new URL(HadoopTaskData.this.url);
AuthenticatedURL.Token token = new AuthenticatedURL.Token();
HttpURLConnection conn = new AuthenticatedURL().openConnection(url, token);
String data = IOUtils.toString(conn.getInputStream());
@@ -103,30 +103,8 @@ public void fetchTaskDetails() {
Elements rows = doc.select("table").select("tr");
for (int i = 1; i < rows.size(); i++) {
Element row = rows.get(i);
- Elements cells = row.select("> td");
- if (cells.size() < 12) {
- continue;
- }
- boolean succeeded = cells.get(2).html().trim().equals("SUCCEEDED");
- if (succeeded) {
- try {
- String startTime = cells.get(4).html().trim();
- String shuffleTime = cells.get(5).html().trim();
- String sortTime = cells.get(6).html().trim();
- if (shuffleTime.contains("(")) {
- shuffleTime = shuffleTime.substring(0, shuffleTime.indexOf("(") - 1);
- }
- if (sortTime.contains("(")) {
- sortTime = sortTime.substring(0, sortTime.indexOf("(") - 1);
- }
- long start = dateFormat.parse(startTime).getTime();
- long shuffle = dateFormat.parse(shuffleTime).getTime();
- long sort = dateFormat.parse(sortTime).getTime();
- this.shuffleTime = (shuffle - start);
- this.sortTime = (sort - shuffle);
- } catch (ParseException e) {
- //Ignored //e.printStackTrace();
- }
+ if (tryExtractDetailFromRow(row)) {
+ return;
}
}
} catch (IOException e) {
@@ -136,6 +114,37 @@ public void fetchTaskDetails() {
}
}
+ //Return true if successfully extracted data from row
+ private boolean tryExtractDetailFromRow(Element row) {
+ Elements cells = row.select("> td");
+ if (cells.size() < 12) {
+ return false;
+ }
+ boolean succeeded = cells.get(2).html().trim().equals("SUCCEEDED");
+ if (succeeded) {
+ try {
+ String startTime = cells.get(4).html().trim();
+ String shuffleTime = cells.get(5).html().trim();
+ String sortTime = cells.get(6).html().trim();
+ if (shuffleTime.contains("(")) {
+ shuffleTime = shuffleTime.substring(0, shuffleTime.indexOf("(") - 1);
+ }
+ if (sortTime.contains("(")) {
+ sortTime = sortTime.substring(0, sortTime.indexOf("(") - 1);
+ }
+ long start = dateFormat.parse(startTime).getTime();
+ long shuffle = dateFormat.parse(shuffleTime).getTime();
+ long sort = dateFormat.parse(sortTime).getTime();
+ HadoopTaskData.this.shuffleTime = (shuffle - start);
+ HadoopTaskData.this.sortTime = (sort - shuffle);
+ return true;
+ } catch (ParseException e) {
+ //Ignored //e.printStackTrace();
+ }
+ }
+ return false;
+ }
+
private String getTaskDetailsPage(RunningJob job, TaskID taskId) {
String jobDetails = job.getTrackingURL();
String root = jobDetails.substring(0, jobDetails.indexOf("jobdetails.jsp"));
diff --git a/app/com/linkedin/drelephant/math/Statistics.java b/app/com/linkedin/drelephant/math/Statistics.java
index 9bfab3719..32d32124a 100644
--- a/app/com/linkedin/drelephant/math/Statistics.java
+++ b/app/com/linkedin/drelephant/math/Statistics.java
@@ -2,7 +2,10 @@
import com.linkedin.drelephant.analysis.Severity;
+import java.lang.reflect.Array;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
public class Statistics {
@@ -73,13 +76,13 @@ private static long[][] two_means(long[] values, long middle) {
}
long[][] result = new long[2][];
- result[0] = toIntArray(smaller);
- result[1] = toIntArray(larger);
+ result[0] = toArray(smaller);
+ result[1] = toArray(larger);
return result;
}
- private static long[] toIntArray(List input) {
+ private static long[] toArray(List input) {
long[] result = new long[input.size()];
for (int i = 0; i < result.length; i++) {
result[i] = input.get(i);
@@ -127,4 +130,23 @@ public static Severity getNumTasksSeverity(long numTasks) {
return Severity.getSeverityAscending(numTasks,
10, 50, 100, 200);
}
+
+ public static T[] createSample(Class clazz, T[] objects, int size) {
+ //Skip this process if number of items already smaller than sample size
+ if (objects.length <= size) {
+ return objects;
+ }
+
+ @SuppressWarnings("unchecked")
+ T[] result = (T[]) Array.newInstance(clazz, size);
+
+ //Shuffle a clone copy
+ T[] clone = objects.clone();
+ Collections.shuffle(Arrays.asList(clone));
+
+ //Take the first n items
+ System.arraycopy(clone, 0, result, 0, size);
+
+ return result;
+ }
}
diff --git a/app/com/linkedin/drelephant/notifications/EmailThread.java b/app/com/linkedin/drelephant/notifications/EmailThread.java
index d63ed784b..8eacbbe89 100644
--- a/app/com/linkedin/drelephant/notifications/EmailThread.java
+++ b/app/com/linkedin/drelephant/notifications/EmailThread.java
@@ -89,6 +89,12 @@ private void sendCriticalEmail(JobResult result) {
email.setSubject("Dr. Elephant - Hadoop Job Status Notification");
email.setHtmlMsg(html);
email.setDebug(true);
+ ///////////////////
+ //
+ // WARNING: This next line will send out the emails.
+ // Do NOT uncomment before proper testing and mental meditation.
+ //
+ ///////////////////
//email.send();
} catch (EmailException e) {
e.printStackTrace();
diff --git a/app/model/JobResult.java b/app/model/JobResult.java
index 2f33f43a5..e9b782a47 100644
--- a/app/model/JobResult.java
+++ b/app/model/JobResult.java
@@ -28,6 +28,9 @@ public class JobResult extends Model {
@Column
public Severity severity;
+ //@Column(length = 100)
+ //public String jobType;
+
@Column(length = 200)
public String url;
diff --git a/app/views/helpMapperDataSkew.scala.html b/app/views/helpMapperDataSkew.scala.html
index d50305d3a..fbf7733b6 100644
--- a/app/views/helpMapperDataSkew.scala.html
+++ b/app/views/helpMapperDataSkew.scala.html
@@ -32,4 +32,5 @@ Mapper Data Skew
Suggestions
Try to make your input files strictly smaller than the HDFS block size to avoid the small excess pieces creating new map tasks.
+ Understand WHY there's skew, use CombinedFileInputFormat (or similar in the various languages), turn on slowstart to mitigate effects.
\ No newline at end of file
diff --git a/app/views/helpMapperInputSize.scala.html b/app/views/helpMapperInputSize.scala.html
index 79e5580ce..dad7880fa 100644
--- a/app/views/helpMapperInputSize.scala.html
+++ b/app/views/helpMapperInputSize.scala.html
@@ -36,9 +36,9 @@ Suggestions
Set the number of mappers smaller by specifying a number or combining small files using Pig or Hive.
- For Hadoop/Java jobs: Use "jobConf.setNumMapTasks(NUMBER_OF_MAPPERS);"
- For Apache-Pig jobs: Try "set pig.maxCombinedSplitSize 536870912"
- For Apache-Hive jobs: Use "set hive.merge.mapredfiles=true"
+ For Hadoop/Java jobs: Try to use mapred.max.split.size in the job conf to split the input files appropriately.
+ For Apache-Pig jobs: Lower pig.maxCombinedSplitSize and set mapred.max.split.size to something smaller.
+ For Apache-Hive jobs: Try to use mapred.max.split.size in the job conf to split the input files appropriately.
Large files/Unsplittable files
diff --git a/app/views/helpMapperSpeed.scala.html b/app/views/helpMapperSpeed.scala.html
index 7107461d6..1e618c356 100644
--- a/app/views/helpMapperSpeed.scala.html
+++ b/app/views/helpMapperSpeed.scala.html
@@ -1,6 +1,6 @@
This analysis shows the effectiveness of your mapper code.
- This should allow you to determine if your mapper is CPU-bound.
+ This should allow you to determine if your mapper is CPU-bound or if your mapper is outputting huge amounts of data.
This result of the analysis shows problems with mappers with significant slow speeds for the amount of data it needs to read.
diff --git a/app/views/helpReducerDataSkew.scala.html b/app/views/helpReducerDataSkew.scala.html
index b4d5a2cfe..61831cacb 100644
--- a/app/views/helpReducerDataSkew.scala.html
+++ b/app/views/helpReducerDataSkew.scala.html
@@ -31,5 +31,8 @@
Reducer Data Skew
Suggestions
- ... Ask Hadoop-Dev
+ This is often caused by skew in the keyspace (aggregation key for group by, join key for joins).
+ If using Pig, try a skew join.
+ Otherwise, consider what you're doing in the job and if there's a better way to do it.
+ THEN talk to hadoop dev
\ No newline at end of file
diff --git a/app/views/helpReducerTime.scala.html b/app/views/helpReducerTime.scala.html
index 8644731f9..08daa3658 100644
--- a/app/views/helpReducerTime.scala.html
+++ b/app/views/helpReducerTime.scala.html
@@ -1,11 +1,11 @@
- This analysis shows the efficiency your reducers.
- This should allow you to better tweak the number of reducers for your job.
+ This analysis shows the efficiency of your reducers.
+ This should allow you to better adjust the number of reducers for your job.
There are two possible situations that needs some tuning.
Too many reducers
- This usually happens when the Hadoop job has:
+ This happens when the Hadoop job has:
- A large number of reducers
- Short reducer runtime
@@ -34,7 +34,7 @@ Reducer Time
Too few reducers
- This usually happens when the Hadoop job has:
+ This happens when the Hadoop job has:
- A small number of reducers
- Long reducer runtime
@@ -66,7 +66,7 @@ Suggestions
Set the number of reducers by specifying a better number in your Hadoop job.
For Hadoop/Java jobs: Use "jobConf.setNumReduceTasks(NUMBER_OF_REDUCERS);"
- For Apache-Pig jobs: Use "set pig.exec.reducers.max NUMBER_OF_REDUCERS"
+ For Apache-Pig jobs: Use PARALLEL [num] clause on the operator which caused this job (Though this will probably be hard for people to understand without Lipstick)
For Apache-Hive jobs: Use "set mapred.reduce.tasks=NUMBER_OF_REDUCERS"
(change NUMBER_OF_TASKS to an appropriate number of tasks)
\ No newline at end of file
diff --git a/app/views/helpShuffleSort.scala.html b/app/views/helpShuffleSort.scala.html
index 6d795c35b..7db398b4e 100644
--- a/app/views/helpShuffleSort.scala.html
+++ b/app/views/helpShuffleSort.scala.html
@@ -1,5 +1,5 @@
- This analysis shows the how much time the reducer spends in shuffle and sort steps versus in the reducer code.
+ This analysis shows how much time the reducer spends in shuffle and sort steps versus in the reducer code.
This should allow you to understand the efficiency of your reducer.
@@ -36,5 +36,5 @@
Shuffle & Sort
Suggestions
- ...Ask Hadoop-Dev
+ If your shuffle time is high but sort is low, you likely need to turn slowstart on
\ No newline at end of file
diff --git a/app/views/multijob.scala.html b/app/views/multijob.scala.html
index b7bf0d20a..052fc328e 100644
--- a/app/views/multijob.scala.html
+++ b/app/views/multijob.scala.html
@@ -6,11 +6,11 @@ @title
@for(heuristicResult <- result.heuristicResults) {
-
+
@heuristicResult.analysisName
@defining(heuristicResult.getDataArray()) { data =>
diff --git a/conf/application.conf b/conf/application.conf
index 16de04bd8..aa3685e8a 100644
--- a/conf/application.conf
+++ b/conf/application.conf
@@ -79,4 +79,8 @@ smtp.host=email.corp.linkedin.com
smtp.port=25
smtp.from="azkaban-noreply@linkedin.com"
# smtp.user=azkaban-noreply
-# smtp.password=
\ No newline at end of file
+# smtp.password=
+
+#Kerberos
+keytab.location="/export/apps/hadoop/keytabs/dr_elephant-service.keytab"
+keytab.user="elephant/eat1-magicaz01.grid.linkedin.com"
\ No newline at end of file