From a045b64d60facd28e58a4a856f24fb78d4817e7d Mon Sep 17 00:00:00 2001 From: Anthony Hsu Date: Tue, 14 May 2019 14:33:24 -0700 Subject: [PATCH] Fix #553: Add support for TonY jobs (#563) * Checkpoint * Skeleton code for TonY jobtype support in Dr. Elephant #553 * Removed some commented-out code, removed "TonY" from "TonY Task Memory" * Implement TonyFetcher * Check if config file exists before trying to open it * Add comment explaining yyyy/MM/dd calculation issue * Implement TonY TaskMemoryHeuristic * Minor tweaks to test case in TaskMemoryHeuristicTest * Implement TonyMetricsAggregator * Remove unused imports, update compile.sh to overwrite when unzipping without prompting * Fix NPE bug and print task information even when no metrics * Add license headers and remove JDK7 from .travis.yml * Fix Checkstyle link * Fix Checkstyle issues * Add ' for TonY jobType in test JobTypeConf.xml * Add comments explain the events we parse and what information we extract from them --- .gitignore | 3 + .travis.yml | 1 - app-conf/AggregatorConf.xml | 4 + app-conf/FetcherConf.xml | 9 + app-conf/HeuristicConf.xml | 16 ++ app-conf/JobTypeConf.xml | 6 + .../drelephant/analysis/HeuristicResult.java | 4 +- .../mapreduce/TaskLevelAggregatedMetrics.java | 2 +- .../tony/TonyMetricsAggregator.java | 89 ++++++++++ .../tony/data/TonyApplicationData.java | 123 ++++++++++++++ .../drelephant/tony/data/TonyTaskData.java | 88 ++++++++++ .../drelephant/tony/fetchers/TonyFetcher.java | 93 ++++++++++ .../tony/heuristics/TaskMemoryHeuristic.java | 121 +++++++++++++ .../drelephant/tony/util/TonyUtils.java | 50 ++++++ app/models/AppResult.java | 1 - app/views/help/tony/helpTaskMemory.scala.html | 18 ++ build.sbt | 2 +- compile.sh | 8 +- project/Dependencies.scala | 10 +- project/checkstyle-config.xml | 4 +- project/checkstyle-java.header | 30 ++-- .../tony/TonyMetricsAggregatorTest.java | 83 +++++++++ .../tony/fetchers/TonyFetcherTest.java | 127 ++++++++++++++ .../heuristics/TaskMemoryHeuristicTest.java | 159 ++++++++++++++++++ test/resources/JobTypeConf.xml | 6 + 25 files changed, 1027 insertions(+), 30 deletions(-) create mode 100644 app/com/linkedin/drelephant/tony/TonyMetricsAggregator.java create mode 100644 app/com/linkedin/drelephant/tony/data/TonyApplicationData.java create mode 100644 app/com/linkedin/drelephant/tony/data/TonyTaskData.java create mode 100644 app/com/linkedin/drelephant/tony/fetchers/TonyFetcher.java create mode 100644 app/com/linkedin/drelephant/tony/heuristics/TaskMemoryHeuristic.java create mode 100644 app/com/linkedin/drelephant/tony/util/TonyUtils.java create mode 100644 app/views/help/tony/helpTaskMemory.scala.html create mode 100644 test/com/linkedin/drelephant/tony/TonyMetricsAggregatorTest.java create mode 100644 test/com/linkedin/drelephant/tony/fetchers/TonyFetcherTest.java create mode 100644 test/com/linkedin/drelephant/tony/heuristics/TaskMemoryHeuristicTest.java diff --git a/.gitignore b/.gitignore index f6b110e89..0688eb701 100644 --- a/.gitignore +++ b/.gitignore @@ -17,10 +17,12 @@ # general *~ *.log +dr_elephant.log.* tmp dump .history /*.iml +/dr-elephant-*/ /out # Eclipse @@ -58,3 +60,4 @@ public/assets/ember/ public/assets/fonts/ web/bower_components/ web/node_modules/ +web/package-lock.json diff --git a/.travis.yml b/.travis.yml index 93988a913..33ac965d7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,6 @@ language: scala sudo: true jdk: - oraclejdk8 - - openjdk7 python: "2.6" install: - sudo pip install inspyred diff --git a/app-conf/AggregatorConf.xml b/app-conf/AggregatorConf.xml index 1536b2d96..3fecc3300 100644 --- a/app-conf/AggregatorConf.xml +++ b/app-conf/AggregatorConf.xml @@ -44,4 +44,8 @@ 0.5 + + tony + com.linkedin.drelephant.tony.TonyMetricsAggregator + diff --git a/app-conf/FetcherConf.xml b/app-conf/FetcherConf.xml index 72c65c987..1f5e49308 100644 --- a/app-conf/FetcherConf.xml +++ b/app-conf/FetcherConf.xml @@ -116,4 +116,13 @@ --> + + + diff --git a/app-conf/HeuristicConf.xml b/app-conf/HeuristicConf.xml index 59985ee1d..9efa2f228 100644 --- a/app-conf/HeuristicConf.xml +++ b/app-conf/HeuristicConf.xml @@ -354,4 +354,20 @@ views.html.help.spark.helpExecutorGcHeuristic + + + + tony + Task Memory + com.linkedin.drelephant.tony.heuristics.TaskMemoryHeuristic + views.html.help.tony.helpTaskMemory + + + + + + + + + diff --git a/app-conf/JobTypeConf.xml b/app-conf/JobTypeConf.xml index 90e20f113..693e39db8 100644 --- a/app-conf/JobTypeConf.xml +++ b/app-conf/JobTypeConf.xml @@ -86,4 +86,10 @@ mapred.child.java.opts + + TonY + TONY + tony.application.name + + diff --git a/app/com/linkedin/drelephant/analysis/HeuristicResult.java b/app/com/linkedin/drelephant/analysis/HeuristicResult.java index b4519e982..ffb6511ea 100644 --- a/app/com/linkedin/drelephant/analysis/HeuristicResult.java +++ b/app/com/linkedin/drelephant/analysis/HeuristicResult.java @@ -40,7 +40,7 @@ public class HeuristicResult { * Heuristic Result Constructor * * @param heuristicClass The Heuristic class - * @param heuristicName The name of the Heursitic + * @param heuristicName The name of the Heuristic * @param severity The severity of the result * @param score The computed score */ @@ -73,7 +73,7 @@ public HeuristicResult(String heuristicClass, String heuristicName, Severity sev /** * Returns the heuristic analyser class name * - * @return the heursitic class name + * @return the heuristic class name */ public String getHeuristicClassName() { return _heuristicClass; diff --git a/app/com/linkedin/drelephant/mapreduce/TaskLevelAggregatedMetrics.java b/app/com/linkedin/drelephant/mapreduce/TaskLevelAggregatedMetrics.java index 5668682be..463f362b6 100644 --- a/app/com/linkedin/drelephant/mapreduce/TaskLevelAggregatedMetrics.java +++ b/app/com/linkedin/drelephant/mapreduce/TaskLevelAggregatedMetrics.java @@ -142,7 +142,7 @@ private void compute(MapReduceTaskData[] taskDatas, long containerSize, long ide } // wastedResources - long wastedMemory = containerSize - (long) (peakMemoryNeed * MEMORY_BUFFER); // give a 50% buffer + long wastedMemory = containerSize - (long) (peakMemoryNeed * MEMORY_BUFFER); // give a 50% buffer if(wastedMemory > 0) { for (long duration : durations) { _resourceWasted += (wastedMemory) * (duration / Statistics.SECOND_IN_MS); // MB Seconds diff --git a/app/com/linkedin/drelephant/tony/TonyMetricsAggregator.java b/app/com/linkedin/drelephant/tony/TonyMetricsAggregator.java new file mode 100644 index 000000000..d94fdce6d --- /dev/null +++ b/app/com/linkedin/drelephant/tony/TonyMetricsAggregator.java @@ -0,0 +1,89 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony; + +import com.google.common.annotations.VisibleForTesting; +import com.linkedin.drelephant.analysis.HadoopAggregatedData; +import com.linkedin.drelephant.analysis.HadoopApplicationData; +import com.linkedin.drelephant.analysis.HadoopMetricsAggregator; +import com.linkedin.drelephant.configurations.aggregator.AggregatorConfigurationData; +import com.linkedin.drelephant.math.Statistics; +import com.linkedin.drelephant.tony.data.TonyApplicationData; +import com.linkedin.drelephant.tony.data.TonyTaskData; +import com.linkedin.drelephant.tony.util.TonyUtils; +import com.linkedin.tony.Constants; +import com.linkedin.tony.TonyConfigurationKeys; +import java.util.Map; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; + + +public class TonyMetricsAggregator implements HadoopMetricsAggregator { + @VisibleForTesting + static final double MEMORY_BUFFER = 1.5; + + private HadoopAggregatedData _hadoopAggregatedData; + + /** + * Creates a new {@code TonyMetricsAggregator}. + * @param unused Dr. Elephant expects a constructor of this form but {@code TonyMetricsAggregator} does not need this + */ + public TonyMetricsAggregator(AggregatorConfigurationData unused) { } + + @Override + public void aggregate(HadoopApplicationData data) { + _hadoopAggregatedData = new HadoopAggregatedData(); + + TonyApplicationData tonyData = (TonyApplicationData) data; + Configuration tonyConf = tonyData.getConfiguration(); + + long mbSecUsed = 0; + long mbSecWasted = 0; + + Map> taskMap = tonyData.getTaskMap(); + for (Map.Entry> entry : taskMap.entrySet()) { + String taskType = entry.getKey(); + + String memoryString = tonyConf.get(TonyConfigurationKeys.getResourceKey(taskType, Constants.MEMORY)); + String memoryStringMB = com.linkedin.tony.util.Utils.parseMemoryString(memoryString); + long mbRequested = Long.parseLong(memoryStringMB); + double maxMemoryMBUsed = TonyUtils.getMaxMemoryBytesUsedForTaskType(taskMap, taskType) / FileUtils.ONE_MB; + + for (TonyTaskData taskData : entry.getValue().values()) { + long taskDurationSec = (taskData.getTaskEndTime() - taskData.getTaskStartTime()) / Statistics.SECOND_IN_MS; + mbSecUsed += mbRequested * taskDurationSec; + + if (maxMemoryMBUsed <= 0) { + // If we don't have max memory metrics, don't calculate wasted memory. + continue; + } + long wastedMemory = (long) (mbRequested - maxMemoryMBUsed * MEMORY_BUFFER); + if (wastedMemory > 0) { + mbSecWasted += wastedMemory * taskDurationSec; + } + } + } + + _hadoopAggregatedData.setResourceUsed(mbSecUsed); + _hadoopAggregatedData.setResourceWasted(mbSecWasted); + // TODO: Calculate and set delay + } + + @Override + public HadoopAggregatedData getResult() { + return _hadoopAggregatedData; + } +} diff --git a/app/com/linkedin/drelephant/tony/data/TonyApplicationData.java b/app/com/linkedin/drelephant/tony/data/TonyApplicationData.java new file mode 100644 index 000000000..a4f03a952 --- /dev/null +++ b/app/com/linkedin/drelephant/tony/data/TonyApplicationData.java @@ -0,0 +1,123 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony.data; + +import com.linkedin.drelephant.analysis.ApplicationType; +import com.linkedin.drelephant.analysis.HadoopApplicationData; +import com.linkedin.tony.events.Event; +import com.linkedin.tony.events.EventType; +import com.linkedin.tony.events.TaskFinished; +import com.linkedin.tony.events.TaskStarted; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import org.apache.hadoop.conf.Configuration; + + +public class TonyApplicationData implements HadoopApplicationData { + private String _appId; + private ApplicationType _appType; + private Configuration _configuration; + private Properties _props; + private Map> _taskMap; + + /** + * Constructor for {@code TonyApplicationData}. + * @param appId application id + * @param appType application type (should be TONY) + * @param configuration the configuration for this application + * @param events the events emitted by this application + */ + public TonyApplicationData(String appId, ApplicationType appType, Configuration configuration, List events) { + _appId = appId; + _appType = appType; + + _configuration = configuration; + _props = new Properties(); + for (Map.Entry entry : configuration) { + _props.setProperty(entry.getKey(), entry.getValue()); + } + + _taskMap = new HashMap<>(); + processEvents(events); + } + + @Override + public String getAppId() { + return _appId; + } + + /** + * Returns the {@link Configuration} for this application. + * @return the configuration for this application + */ + public Configuration getConfiguration() { + return _configuration; + } + + @Override + public Properties getConf() { + return _props; + } + + @Override + public ApplicationType getApplicationType() { + return _appType; + } + + /** + * Returns a map of task data. + * @return a map from task type to a map of task index to task data + */ + public Map> getTaskMap() { + return _taskMap; + } + + @Override + public boolean isEmpty() { + return false; + } + + private void initTaskMap(String taskType, int taskIndex) { + if (!_taskMap.containsKey(taskType)) { + _taskMap.put(taskType, new HashMap<>()); + } + + if (!_taskMap.get(taskType).containsKey(taskIndex)) { + _taskMap.get(taskType).put(taskIndex, new TonyTaskData(taskType, taskIndex)); + } + } + + private void processEvents(List events) { + for (Event event : events) { + if (event.getType().equals(EventType.TASK_STARTED)) { + TaskStarted taskStartedEvent = (TaskStarted) event.getEvent(); + String taskType = taskStartedEvent.getTaskType(); + int taskIndex = taskStartedEvent.getTaskIndex(); + initTaskMap(taskType, taskIndex); + _taskMap.get(taskType).get(taskIndex).setTaskStartTime(event.getTimestamp()); + } else if (event.getType().equals(EventType.TASK_FINISHED)) { + TaskFinished taskFinishedEvent = (TaskFinished) event.getEvent(); + String taskType = taskFinishedEvent.getTaskType(); + int taskIndex = taskFinishedEvent.getTaskIndex(); + initTaskMap(taskType, taskIndex); + _taskMap.get(taskType).get(taskIndex).setTaskEndTime(event.getTimestamp()); + _taskMap.get(taskType).get(taskIndex).setMetrics(taskFinishedEvent.getMetrics()); + } + } + } +} diff --git a/app/com/linkedin/drelephant/tony/data/TonyTaskData.java b/app/com/linkedin/drelephant/tony/data/TonyTaskData.java new file mode 100644 index 000000000..40066f325 --- /dev/null +++ b/app/com/linkedin/drelephant/tony/data/TonyTaskData.java @@ -0,0 +1,88 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony.data; + +import com.linkedin.tony.events.Metric; +import java.util.List; + + +public class TonyTaskData { + private final String _taskType; + private final int _taskIndex; + + private long _taskStartTime; + private long _taskEndTime; + private List _metrics; + + /** + * Creates a new {@code TonyTaskData} encapsulating a task's data, such as task type, index, start time, + * end time, and metrics. + * @param taskType the task type + * @param taskIndex the task index + */ + public TonyTaskData(String taskType, int taskIndex) { + _taskType = taskType; + _taskIndex = taskIndex; + } + + /** + * Get task start time. + * @return the task start time + */ + public long getTaskStartTime() { + return _taskStartTime; + } + + /** + * Set task start time. + * @param taskStartTime the task start time + */ + public void setTaskStartTime(long taskStartTime) { + _taskStartTime = taskStartTime; + } + + /** + * Get task end time. + * @return the task end time + */ + public long getTaskEndTime() { + return _taskEndTime; + } + + /** + * Set task end time. + * @param taskEndTime the end time + */ + public void setTaskEndTime(long taskEndTime) { + _taskEndTime = taskEndTime; + } + + /** + * Get the metrics for this task. + * @return the metrics + */ + public List getMetrics() { + return _metrics; + } + + /** + * Sets the metrics for this task. + * @param metrics the metrics + */ + public void setMetrics(List metrics) { + _metrics = metrics; + } +} diff --git a/app/com/linkedin/drelephant/tony/fetchers/TonyFetcher.java b/app/com/linkedin/drelephant/tony/fetchers/TonyFetcher.java new file mode 100644 index 000000000..2687b1d11 --- /dev/null +++ b/app/com/linkedin/drelephant/tony/fetchers/TonyFetcher.java @@ -0,0 +1,93 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony.fetchers; + +import com.linkedin.drelephant.analysis.AnalyticJob; +import com.linkedin.drelephant.analysis.ElephantFetcher; +import com.linkedin.drelephant.configurations.fetcher.FetcherConfigurationData; +import com.linkedin.drelephant.tony.data.TonyApplicationData; +import com.linkedin.tony.Constants; +import com.linkedin.tony.TonyConfigurationKeys; +import com.linkedin.tony.events.Event; +import com.linkedin.tony.util.ParserUtils; +import java.io.IOException; +import java.util.Date; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.log4j.Logger; + + +public class TonyFetcher implements ElephantFetcher { + private static final Logger _LOGGER = Logger.getLogger(TonyFetcher.class); + private final Path _finishedDir; + private final FileSystem _fs; + + /** + * Constructor for {@link TonyFetcher}. + * @param fetcherConfig the fetcher configuration + * @throws IOException + */ + public TonyFetcher(FetcherConfigurationData fetcherConfig) throws IOException { + Configuration conf = new Configuration(); + + String tonyConfDir = System.getenv(Constants.TONY_CONF_DIR); + if (fetcherConfig.getParamMap().containsKey(Constants.TONY_CONF_DIR)) { + tonyConfDir = fetcherConfig.getParamMap().get(Constants.TONY_CONF_DIR); + } + _LOGGER.info("Using TonY conf dir: " + tonyConfDir); + + conf.addResource(new Path(tonyConfDir + Path.SEPARATOR + Constants.TONY_SITE_CONF)); + _finishedDir = new Path(conf.get(TonyConfigurationKeys.TONY_HISTORY_FINISHED)); + _fs = _finishedDir.getFileSystem(conf); + } + + @Override + public TonyApplicationData fetchData(AnalyticJob job) throws Exception { + _LOGGER.debug("Fetching data for job " + job.getAppId()); + long finishTime = job.getFinishTime(); + Date date = new Date(finishTime); + + // TODO: We are deriving yyyy/MM/dd from the RM's application finish time, but the TonY Portal actually creates the + // yyyy/MM/dd directory based off the end time embedded in the jhist file name. This end time is taken before the + // application finishes and thus is slightly before the RM's application finish time. So it's possible that the + // yyyy/MM/dd derived from the RM's application finish time is a day later and we may not find the history files. + // In case we don't find the history files in yyyy/MM/dd, we should check the previous day as well. + String yearMonthDay = ParserUtils.getYearMonthDayDirectory(date); + Path jobDir = new Path(_finishedDir, yearMonthDay + Path.SEPARATOR + job.getAppId()); + _LOGGER.debug("Job directory for " + job.getAppId() + ": " + jobDir); + + // parse config + Path confFile = new Path(jobDir, Constants.TONY_FINAL_XML); + if (!_fs.exists(confFile)) { + // for backward compatibility, see https://github.com/linkedin/TonY/issues/271 + confFile = new Path(jobDir, "config.xml"); + } + Configuration conf = new Configuration(false); + if (_fs.exists(confFile)) { + conf.addResource(_fs.open(confFile)); + } + + // Parse events. For a list of event types, see + // https://github.com/linkedin/TonY/blob/master/tony-core/src/main/avro/EventType.avsc. + // We get the task start time from the TASK_STARTED event and the finish time and metrics from the TASK_FINISHED + // event. + List events = ParserUtils.parseEvents(_fs, jobDir); + + return new TonyApplicationData(job.getAppId(), job.getAppType(), conf, events); + } +} diff --git a/app/com/linkedin/drelephant/tony/heuristics/TaskMemoryHeuristic.java b/app/com/linkedin/drelephant/tony/heuristics/TaskMemoryHeuristic.java new file mode 100644 index 000000000..1cb6ce660 --- /dev/null +++ b/app/com/linkedin/drelephant/tony/heuristics/TaskMemoryHeuristic.java @@ -0,0 +1,121 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony.heuristics; + +import com.linkedin.drelephant.analysis.Heuristic; +import com.linkedin.drelephant.analysis.HeuristicResult; +import com.linkedin.drelephant.analysis.HeuristicResultDetails; +import com.linkedin.drelephant.analysis.Severity; +import com.linkedin.drelephant.configurations.heuristic.HeuristicConfigurationData; +import com.linkedin.drelephant.tony.data.TonyApplicationData; +import com.linkedin.drelephant.tony.data.TonyTaskData; +import com.linkedin.drelephant.tony.util.TonyUtils; +import com.linkedin.drelephant.util.Utils; +import com.linkedin.tony.Constants; +import com.linkedin.tony.TonyConfigurationKeys; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.log4j.Logger; + + +/** + * For each type of task (e.g.: worker, ps), this heuristic checks the max memory used across all tasks of that type + * and compares against some thresholds. The final severity is the highest severity across all task types. + */ +public class TaskMemoryHeuristic implements Heuristic { + private static final Logger _LOGGER = Logger.getLogger(TaskMemoryHeuristic.class); + private static final int DEFAULT_CONTAINER_MEMORY_MB = 2048; + private static final String CONTAINER_MEMORY_DEFAULT_MB_CONF = "container_memory_default_mb"; + private static final String TASK_MEMORY_THRESHOLDS_CONF = "task_memory_thresholds"; + + private HeuristicConfigurationData _heuristicConfData; + private long defaultContainerMemoryBytes = DEFAULT_CONTAINER_MEMORY_MB * FileUtils.ONE_MB; + + // Initialized to default max memory thresholds + private double[] maxMemoryLimits = {0.8, 0.7, 0.6, 0.5}; + + /** + * Constructor for {@link TaskMemoryHeuristic}. + * @param heuristicConfData the configuration for this heuristic + */ + public TaskMemoryHeuristic(HeuristicConfigurationData heuristicConfData) { + this._heuristicConfData = heuristicConfData; + + Map params = heuristicConfData.getParamMap(); + // read default container size + if (params.containsKey(CONTAINER_MEMORY_DEFAULT_MB_CONF)) { + defaultContainerMemoryBytes = Long.parseLong(params.get(CONTAINER_MEMORY_DEFAULT_MB_CONF)) * FileUtils.ONE_MB; + } + // read max memory thresholds + if (params.containsKey(TASK_MEMORY_THRESHOLDS_CONF)) { + maxMemoryLimits = Utils.getParam(params.get(TASK_MEMORY_THRESHOLDS_CONF), maxMemoryLimits.length); + } + } + + @Override + public HeuristicResult apply(TonyApplicationData data) { + _LOGGER.debug("Applying TaskMemoryHeuristic"); + Map> taskMap = data.getTaskMap(); + Configuration conf = data.getConfiguration(); + + Set taskTypes = com.linkedin.tony.util.Utils.getAllJobTypes(conf); + Severity finalSeverity = Severity.NONE; + List details = new ArrayList<>(); + + for (String taskType : taskTypes) { + details.add(new HeuristicResultDetails("Number of " + taskType + " tasks", + Integer.toString(taskMap.get(taskType).size()))); + + // get per task memory requested + String memoryString = conf.get(TonyConfigurationKeys.getResourceKey(taskType, Constants.MEMORY)); + String memoryStringMB = com.linkedin.tony.util.Utils.parseMemoryString(memoryString); + long taskBytesRequested = Long.parseLong(memoryStringMB) * FileUtils.ONE_MB; + details.add(new HeuristicResultDetails("Requested memory (MB) per " + taskType + " task", + Long.toString(taskBytesRequested / FileUtils.ONE_MB))); + + // get global max memory per task + double maxMemoryBytesUsed = TonyUtils.getMaxMemoryBytesUsedForTaskType(taskMap, taskType); + if (maxMemoryBytesUsed <= 0) { + details.add(new HeuristicResultDetails("Max memory (MB) used in any " + taskType + " task", "Unknown")); + continue; + } + details.add(new HeuristicResultDetails("Max memory (MB) used in any " + taskType + " task", + Long.toString((long) maxMemoryBytesUsed / FileUtils.ONE_MB))); + + // compare to threshold and update severity + if (taskBytesRequested <= defaultContainerMemoryBytes) { + // If using default container memory, automatic pass + continue; + } + double maxMemoryRatio = maxMemoryBytesUsed / taskBytesRequested; + Severity taskMemorySeverity = Severity.getSeverityDescending(maxMemoryRatio, maxMemoryLimits[0], + maxMemoryLimits[1], maxMemoryLimits[2], maxMemoryLimits[3]); + finalSeverity = Severity.max(finalSeverity, taskMemorySeverity); + } + + return new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), finalSeverity, + 0, details); + } + + @Override + public HeuristicConfigurationData getHeuristicConfData() { + return _heuristicConfData; + } +} diff --git a/app/com/linkedin/drelephant/tony/util/TonyUtils.java b/app/com/linkedin/drelephant/tony/util/TonyUtils.java new file mode 100644 index 000000000..bab427184 --- /dev/null +++ b/app/com/linkedin/drelephant/tony/util/TonyUtils.java @@ -0,0 +1,50 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony.util; + +import com.linkedin.drelephant.tony.data.TonyTaskData; +import com.linkedin.tony.Constants; +import com.linkedin.tony.events.Metric; +import java.util.List; +import java.util.Map; + + +public class TonyUtils { + /** + * Returns the max memory in bytes used by any task of the specified type. + * @param taskMap a map containing data for all tasks + * @param taskType the task type + * @return the max memory in bytes used by any task of the specified type + */ + public static double getMaxMemoryBytesUsedForTaskType(Map> taskMap, + String taskType) { + double maxMemoryBytesUsed = 0; + for (TonyTaskData taskData : taskMap.get(taskType).values()) { + List metrics = taskData.getMetrics(); + if (metrics == null) { + return -1; + } + for (Metric metric : metrics) { + if (metric.getName().equals(Constants.MAX_MEMORY_BYTES)) { + if (metric.getValue() > maxMemoryBytesUsed) { + maxMemoryBytesUsed = metric.getValue(); + } + } + } + } + return maxMemoryBytesUsed; + } +} diff --git a/app/models/AppResult.java b/app/models/AppResult.java index e7c9c7f6c..cca925007 100644 --- a/app/models/AppResult.java +++ b/app/models/AppResult.java @@ -20,7 +20,6 @@ import com.linkedin.drelephant.analysis.Severity; import com.linkedin.drelephant.util.Utils; -import java.util.Date; import play.db.ebean.Model; import java.util.List; diff --git a/app/views/help/tony/helpTaskMemory.scala.html b/app/views/help/tony/helpTaskMemory.scala.html new file mode 100644 index 000000000..2cfc1031c --- /dev/null +++ b/app/views/help/tony/helpTaskMemory.scala.html @@ -0,0 +1,18 @@ +@* +* Copyright 2019 LinkedIn Corp. +* +* Licensed under the Apache License, Version 2.0 (the "License"); you may not +* use this file except in compliance with the License. You may obtain a copy of +* the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +* License for the specific language governing permissions and limitations under +* the License. +*@ +

+ Don't use so much memory! +

\ No newline at end of file diff --git a/build.sbt b/build.sbt index 5aae3a90d..eb24585b2 100644 --- a/build.sbt +++ b/build.sbt @@ -26,7 +26,7 @@ organization := "com.linkedin.drelephant" // Enable CPD SBT plugin lazy val root = (project in file(".")).enablePlugins(CopyPasteDetector) -javacOptions in Compile ++= Seq("-source", "1.6", "-target", "1.6") +javacOptions in Compile ++= Seq("-source", "1.8", "-target", "1.8") libraryDependencies ++= dependencies map { _.excludeAll(exclusionRules: _*) } diff --git a/compile.sh b/compile.sh index 3c09cbedb..7616dcb1b 100755 --- a/compile.sh +++ b/compile.sh @@ -427,7 +427,7 @@ if [ $run_StyleChecks = "y" ]; then fi set -v -set -x +set -ex # Echo the value of pwd in the script so that it is clear what is being removed. rm -rf ${project_root}/dist mkdir dist @@ -436,8 +436,8 @@ play_command $OPTS dist cd target/universal -ZIP_NAME=`/bin/ls *.zip` -unzip ${ZIP_NAME} +ZIP_NAME=`ls *.zip` +unzip -o ${ZIP_NAME} rm ${ZIP_NAME} DIST_NAME=${ZIP_NAME%.zip} @@ -452,7 +452,7 @@ cp $stop_script ${DIST_NAME}/bin/ cp -r $app_conf ${DIST_NAME} -mkdir ${DIST_NAME}/scripts/ +mkdir -p ${DIST_NAME}/scripts/ cp -r $pso_dir ${DIST_NAME}/scripts/ diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 533727a6e..597952c2f 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -23,13 +23,14 @@ object Dependencies { lazy val commonsCodecVersion = "1.10" lazy val commonsIoVersion = "2.4" lazy val gsonVersion = "2.2.4" - lazy val guavaVersion = "18.0" // Hadoop defaultly are using guava 11.0, might raise NoSuchMethodException + lazy val guavaVersion = "18.0" // Hadoop by default uses Guava 11.0, might raise NoSuchMethodException lazy val jacksonMapperAslVersion = "1.7.3" lazy val jacksonVersion = "2.5.3" lazy val jerseyVersion = "2.24" lazy val jsoupVersion = "1.7.3" lazy val mysqlConnectorVersion = "5.1.36" lazy val oozieClientVersion = "4.2.0" + lazy val tonyVersion = "0.3.6" lazy val HADOOP_VERSION = "hadoopversion" lazy val SPARK_VERSION = "sparkversion" @@ -95,8 +96,11 @@ object Dependencies { "org.apache.httpcomponents" % "httpclient" % "4.5.2", "org.apache.httpcomponents" % "httpcore" % "4.4.4", "org.scalatest" %% "scalatest" % "3.0.0" % Test, - "com.h2database" % "h2" % "1.4.196" % Test - + "com.h2database" % "h2" % "1.4.196" % Test, + "com.linkedin.tony" % "tony-core" % tonyVersion excludeAll( + ExclusionRule(organization = "com.fasterxml.jackson.core"), + ExclusionRule(organization = "org.apache.hadoop") + ) ) :+ sparkExclusion var dependencies = Seq(javaJdbc, javaEbean, cache) diff --git a/project/checkstyle-config.xml b/project/checkstyle-config.xml index 565b3cc7d..187628b01 100644 --- a/project/checkstyle-config.xml +++ b/project/checkstyle-config.xml @@ -14,7 +14,7 @@ License for the specific language governing permissions and limitations under the License. --> - + @@ -32,7 +32,7 @@ - + diff --git a/project/checkstyle-java.header b/project/checkstyle-java.header index 3c8eb723e..659cce4e6 100644 --- a/project/checkstyle-java.header +++ b/project/checkstyle-java.header @@ -1,15 +1,15 @@ -/* - * Copyright 2016 LinkedIn Corp. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ +^/\*$ +^ \* Copyright \d{4} LinkedIn Corp.$ +^ \*$ +^ \* Licensed under the Apache License, Version 2.0 \(the "License"\); you may not$ +^ \* use this file except in compliance with the License. You may obtain a copy of$ +^ \* the License at$ +^ \*$ +^ \* http://www.apache.org/licenses/LICENSE-2.0$ +^ \*$ +^ \* Unless required by applicable law or agreed to in writing, software$ +^ \* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT$ +^ \* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the$ +^ \* License for the specific language governing permissions and limitations under$ +^ \* the License.$ +^ \*/$ diff --git a/test/com/linkedin/drelephant/tony/TonyMetricsAggregatorTest.java b/test/com/linkedin/drelephant/tony/TonyMetricsAggregatorTest.java new file mode 100644 index 000000000..dec9058d2 --- /dev/null +++ b/test/com/linkedin/drelephant/tony/TonyMetricsAggregatorTest.java @@ -0,0 +1,83 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony; + +import com.google.common.collect.ImmutableList; +import com.linkedin.drelephant.analysis.ApplicationType; +import com.linkedin.drelephant.analysis.HadoopAggregatedData; +import com.linkedin.drelephant.math.Statistics; +import com.linkedin.drelephant.tony.data.TonyApplicationData; +import com.linkedin.tony.Constants; +import com.linkedin.tony.TonyConfigurationKeys; +import com.linkedin.tony.events.Event; +import com.linkedin.tony.events.EventType; +import com.linkedin.tony.events.Metric; +import com.linkedin.tony.events.TaskFinished; +import com.linkedin.tony.events.TaskStarted; +import com.linkedin.tony.rpc.impl.TaskStatus; +import java.util.ArrayList; +import java.util.List; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.junit.Assert; +import org.junit.Test; + +import static com.linkedin.drelephant.tony.TonyMetricsAggregator.MEMORY_BUFFER; + + +public class TonyMetricsAggregatorTest { + /** + * Low memory utilization but default container size, so pass. + */ + @Test + public void testMetricsAggregator() { + Configuration conf = new Configuration(false); + conf.set(TonyConfigurationKeys.getResourceKey(Constants.WORKER_JOB_NAME, Constants.MEMORY), "4g"); + conf.setInt(TonyConfigurationKeys.getInstancesKey(Constants.WORKER_JOB_NAME), 2); + conf.set(TonyConfigurationKeys.getResourceKey(Constants.PS_JOB_NAME, Constants.MEMORY), "4g"); + conf.setInt(TonyConfigurationKeys.getInstancesKey(Constants.PS_JOB_NAME), 1); + + List events = new ArrayList<>(); + events.add(new Event(EventType.TASK_STARTED, new TaskStarted(Constants.WORKER_JOB_NAME, 0, null),0L)); + events.add(new Event(EventType.TASK_STARTED, new TaskStarted(Constants.WORKER_JOB_NAME, 1, null),0L)); + events.add(new Event(EventType.TASK_STARTED, new TaskStarted(Constants.PS_JOB_NAME, 0, null),0L)); + events.add(new Event(EventType.TASK_FINISHED, + new TaskFinished(Constants.WORKER_JOB_NAME, 0, TaskStatus.SUCCEEDED.toString(), + ImmutableList.of(new Metric(Constants.MAX_MEMORY_BYTES, (double) FileUtils.ONE_GB))), + 10L * Statistics.SECOND_IN_MS)); + events.add(new Event(EventType.TASK_FINISHED, + new TaskFinished(Constants.WORKER_JOB_NAME, 1, TaskStatus.SUCCEEDED.toString(), + ImmutableList.of(new Metric(Constants.MAX_MEMORY_BYTES, (double) 2 * FileUtils.ONE_GB))), + 20L * Statistics.SECOND_IN_MS)); + events.add(new Event(EventType.TASK_FINISHED, + new TaskFinished(Constants.PS_JOB_NAME, 0, TaskStatus.SUCCEEDED.toString(), + ImmutableList.of(new Metric(Constants.MAX_MEMORY_BYTES, (double) FileUtils.ONE_GB))), + 20L * Statistics.SECOND_IN_MS)); + + long expectedResourcesUsed = 10 * 4 * 1024 + 20 * 4 * 1024 + 20 * 4 * 1024; + long expectedResourcesWasted = 10 * (long) (4 * 1024 - 2 * 1024 * MEMORY_BUFFER) + + 20 * (long) (4 * 1024 - 2 * 1024 * MEMORY_BUFFER) + + 20 * (long) (4 * 1024 - 1 * 1024 * MEMORY_BUFFER); + + ApplicationType appType = new ApplicationType(Constants.APP_TYPE); + TonyApplicationData data = new TonyApplicationData("application_123_456", appType, conf, events); + TonyMetricsAggregator metricsAggregator = new TonyMetricsAggregator(null); + metricsAggregator.aggregate(data); + HadoopAggregatedData result = metricsAggregator.getResult(); + Assert.assertEquals(expectedResourcesUsed, result.getResourceUsed()); + Assert.assertEquals(expectedResourcesWasted, result.getResourceWasted()); + } +} diff --git a/test/com/linkedin/drelephant/tony/fetchers/TonyFetcherTest.java b/test/com/linkedin/drelephant/tony/fetchers/TonyFetcherTest.java new file mode 100644 index 000000000..e2348656d --- /dev/null +++ b/test/com/linkedin/drelephant/tony/fetchers/TonyFetcherTest.java @@ -0,0 +1,127 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony.fetchers; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.io.Files; +import com.linkedin.drelephant.analysis.AnalyticJob; +import com.linkedin.drelephant.analysis.ApplicationType; +import com.linkedin.drelephant.configurations.fetcher.FetcherConfigurationData; +import com.linkedin.drelephant.tony.data.TonyApplicationData; +import com.linkedin.drelephant.tony.data.TonyTaskData; +import com.linkedin.tony.Constants; +import com.linkedin.tony.TonyConfigurationKeys; +import com.linkedin.tony.events.Event; +import com.linkedin.tony.events.EventType; +import com.linkedin.tony.events.Metric; +import com.linkedin.tony.events.TaskFinished; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.List; +import java.util.Map; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.specific.SpecificDatumWriter; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + + +public class TonyFetcherTest { + private static final String APPLICATION_ID = "application_123_456"; + private static File _finishedDir; + private static String _tonyConfDir; + private static Date _endDate; + + @BeforeClass + public static void setup() throws IOException, ParseException { + setupFinishedApplicationDir(); + setupTestTonyConfDir(); + } + + private static void setupFinishedApplicationDir() throws IOException, ParseException { + String yearMonthDay = "2019/05/02"; + SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd"); + _endDate = sdf.parse(yearMonthDay); + + File tempDir = Files.createTempDir(); + _finishedDir = new File(tempDir, "finished"); + File appDir = new File(_finishedDir, yearMonthDay + Path.SEPARATOR + APPLICATION_ID); + appDir.mkdirs(); + + Configuration conf = new Configuration(false); + conf.set("foo", "bar"); + + File configFile = new File(appDir, Constants.TONY_FINAL_XML); + conf.writeXml(new FileOutputStream(configFile)); + + Event event0 = new Event(EventType.TASK_FINISHED, new TaskFinished("worker", 0, "SUCCEEDED", + ImmutableList.of(new Metric("my_metric", 0.0))), System.currentTimeMillis()); + Event event1 = new Event(EventType.TASK_FINISHED, new TaskFinished("worker", 1, "SUCCEEDED", + ImmutableList.of(new Metric("my_metric", 1.0))), System.currentTimeMillis()); + Event event2 = new Event(EventType.TASK_FINISHED, new TaskFinished("ps", 0, "SUCCEEDED", + ImmutableList.of(new Metric("my_metric", 0.0))), System.currentTimeMillis()); + + File eventFile = new File(appDir, + APPLICATION_ID + "-0-" + _endDate.getTime() + "-user1-SUCCEEDED." + Constants.HISTFILE_SUFFIX); + DatumWriter userDatumWriter = new SpecificDatumWriter<>(Event.class); + DataFileWriter dataFileWriter = new DataFileWriter<>(userDatumWriter); + dataFileWriter.create(event0.getSchema(), eventFile); + dataFileWriter.append(event0); + dataFileWriter.append(event1); + dataFileWriter.append(event2); + dataFileWriter.close(); + } + + private static void setupTestTonyConfDir() throws IOException { + Configuration testTonyConf = new Configuration(false); + testTonyConf.set(TonyConfigurationKeys.TONY_HISTORY_FINISHED, _finishedDir.getPath()); + + File confDir = Files.createTempDir(); + _tonyConfDir = confDir.getPath(); + File tonySiteFile = new File(confDir, Constants.TONY_SITE_CONF); + testTonyConf.writeXml(new FileOutputStream(tonySiteFile)); + } + + @Test + public void testFetchData() throws Exception { + FetcherConfigurationData configData = new FetcherConfigurationData(null, null, + ImmutableMap.of(Constants.TONY_CONF_DIR, _tonyConfDir)); + TonyFetcher tonyFetcher = new TonyFetcher(configData); + + AnalyticJob job = new AnalyticJob(); + ApplicationType tonyAppType = new ApplicationType(Constants.APP_TYPE); + job.setFinishTime(_endDate.getTime()); + job.setAppId(APPLICATION_ID); + job.setAppType(tonyAppType); + TonyApplicationData appData = tonyFetcher.fetchData(job); + + Assert.assertEquals(APPLICATION_ID, appData.getAppId()); + Assert.assertEquals(tonyAppType, appData.getApplicationType()); + Assert.assertEquals("bar", appData.getConf().get("foo")); + Map> metricsMap = appData.getTaskMap(); + Assert.assertEquals(2, metricsMap.size()); + Assert.assertEquals(2, metricsMap.get("worker").size()); + Assert.assertEquals(1, metricsMap.get("ps").size()); + } +} \ No newline at end of file diff --git a/test/com/linkedin/drelephant/tony/heuristics/TaskMemoryHeuristicTest.java b/test/com/linkedin/drelephant/tony/heuristics/TaskMemoryHeuristicTest.java new file mode 100644 index 000000000..14039ae5f --- /dev/null +++ b/test/com/linkedin/drelephant/tony/heuristics/TaskMemoryHeuristicTest.java @@ -0,0 +1,159 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony.heuristics; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.linkedin.drelephant.analysis.ApplicationType; +import com.linkedin.drelephant.analysis.HeuristicResult; +import com.linkedin.drelephant.analysis.Severity; +import com.linkedin.drelephant.configurations.heuristic.HeuristicConfigurationData; +import com.linkedin.drelephant.tony.data.TonyApplicationData; +import com.linkedin.tony.Constants; +import com.linkedin.tony.TonyConfigurationKeys; +import com.linkedin.tony.events.Event; +import com.linkedin.tony.events.EventType; +import com.linkedin.tony.events.Metric; +import com.linkedin.tony.events.TaskFinished; +import com.linkedin.tony.rpc.impl.TaskStatus; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.junit.Assert; +import org.junit.Test; + + +public class TaskMemoryHeuristicTest { + + /** + * 3g workers requested, max worker memory < 50% + */ + @Test + public void testCritical() { + testHelper( + ImmutableMap.of(Constants.WORKER_JOB_NAME, new double[]{ + 1.2e9, + 1.1e9, + 1e9, + 1.3e9 + }, Constants.PS_JOB_NAME, new double[]{0.5e9}), + ImmutableMap.of(Constants.WORKER_JOB_NAME, "3g", Constants.PS_JOB_NAME, "2g"), + Severity.CRITICAL + ); + } + + /** + * 3g ps requested, max ps memory < 60% + */ + @Test + public void testSevere() { + testHelper( + ImmutableMap.of(Constants.WORKER_JOB_NAME, new double[]{ + 1.5e9, + 1.6e9, + }, Constants.PS_JOB_NAME, new double[]{1.84e9}), + ImmutableMap.of(Constants.WORKER_JOB_NAME, "2g", Constants.PS_JOB_NAME, "3g"), + Severity.SEVERE + ); + } + + /** + * 3g workers requested, max worker memory < 70% + */ + @Test + public void testModerate() { + testHelper( + ImmutableMap.of(Constants.WORKER_JOB_NAME, new double[]{ + 2.14e9, + 2e9, + }), + ImmutableMap.of(Constants.WORKER_JOB_NAME, "3g"), + Severity.MODERATE + ); + } + + /** + * 3g workers requested, max worker memory < 80% + */ + @Test + public void testLow() { + testHelper( + ImmutableMap.of(Constants.WORKER_JOB_NAME, new double[]{ + 2e9, + 2.45e9, + }), + ImmutableMap.of(Constants.WORKER_JOB_NAME, "3g"), + Severity.LOW + ); + } + + /** + * 3g workers requested, max worker memory > 80% + */ + @Test + public void testNone() { + testHelper( + ImmutableMap.of(Constants.WORKER_JOB_NAME, new double[]{ + 2.5e9, + 2.6e9, + }), + ImmutableMap.of(Constants.WORKER_JOB_NAME, "3g"), + Severity.NONE + ); + } + + /** + * Low memory utilization but default container size, so pass. + */ + @Test + public void testLowUtilizationDefaultContainerSize() { + testHelper( + ImmutableMap.of(Constants.WORKER_JOB_NAME, new double[]{ + 0.5e9, + 0.6e9, + }), + ImmutableMap.of(Constants.WORKER_JOB_NAME, "2g"), + Severity.NONE + ); + } + + public void testHelper(Map memUsed, Map memRequested, Severity expectedSeverity) { + Configuration conf = new Configuration(false); + List events = new ArrayList<>(); + for (Map.Entry entry : memRequested.entrySet()) { + String taskType = entry.getKey(); + conf.set(TonyConfigurationKeys.getResourceKey(taskType, Constants.MEMORY), entry.getValue()); + conf.setInt(TonyConfigurationKeys.getInstancesKey(taskType), memUsed.get(taskType).length); + + for (int i = 0; i < memUsed.get(taskType).length; i++) { + events.add(new Event(EventType.TASK_FINISHED, + new TaskFinished(taskType, i, TaskStatus.SUCCEEDED.toString(), + ImmutableList.of(new Metric(Constants.MAX_MEMORY_BYTES, memUsed.get(taskType)[i]))), + System.currentTimeMillis())); + } + } + + ApplicationType appType = new ApplicationType(Constants.APP_TYPE); + TonyApplicationData data = new TonyApplicationData("application_123_456", appType, conf, events); + + TaskMemoryHeuristic heuristic = new TaskMemoryHeuristic(new HeuristicConfigurationData("ignored", + "ignored", "ignored", appType, Collections.EMPTY_MAP)); + HeuristicResult result = heuristic.apply(data); + Assert.assertEquals(expectedSeverity, result.getSeverity()); + } +} diff --git a/test/resources/JobTypeConf.xml b/test/resources/JobTypeConf.xml index 8a4cae3eb..c7d002898 100644 --- a/test/resources/JobTypeConf.xml +++ b/test/resources/JobTypeConf.xml @@ -74,4 +74,10 @@ mapred.child.java.opts + + TonY + TONY + tony.application.name + +