diff --git a/.gitignore b/.gitignore index f6b110e89..0688eb701 100644 --- a/.gitignore +++ b/.gitignore @@ -17,10 +17,12 @@ # general *~ *.log +dr_elephant.log.* tmp dump .history /*.iml +/dr-elephant-*/ /out # Eclipse @@ -58,3 +60,4 @@ public/assets/ember/ public/assets/fonts/ web/bower_components/ web/node_modules/ +web/package-lock.json diff --git a/.travis.yml b/.travis.yml index 540cb8602..c1eafe0e7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,7 @@ language: scala sudo: true jdk: - - oraclejdk8 - - openjdk7 + - openjdk8 python: "2.6" install: - sudo pip install inspyred @@ -11,10 +10,11 @@ install: script: - ./travis.sh -# only build PRs and master (not all branch pushes) +# only build PRs plus branches master and customSHSWork (not all branch pushes) branches: only: - master + - customSHSWork env: - PSO_DIR_PATH=$TRAVIS_BUILD_DIR/scripts/pso/ diff --git a/README.md b/README.md index ee75e2233..15e117585 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ Engineering Blog: [Click here](https://engineering.linkedin.com/blog/2016/04/dr- ## Mailing-list & Github Issues -Google groups mailing list: [Click here](https://groups.google.com/forum/#!forum/dr-elephant-users) (Reached upper limit! please create github issues) +~~Google groups mailing list: [Click here](https://groups.google.com/forum/#!forum/dr-elephant-users)~~ (Reached upper limit! please create github issues) Github issues: [click here](https://github.com/linkedin/dr-elephant/issues) diff --git a/app-conf/AggregatorConf.xml b/app-conf/AggregatorConf.xml index 1536b2d96..3fecc3300 100644 --- a/app-conf/AggregatorConf.xml +++ b/app-conf/AggregatorConf.xml @@ -44,4 +44,8 @@ 0.5 + + tony + com.linkedin.drelephant.tony.TonyMetricsAggregator + diff --git a/app-conf/FetcherConf.xml b/app-conf/FetcherConf.xml index 72c65c987..1f5e49308 100644 --- a/app-conf/FetcherConf.xml +++ b/app-conf/FetcherConf.xml @@ -116,4 +116,13 @@ --> + + + diff --git a/app-conf/HeuristicConf.xml b/app-conf/HeuristicConf.xml index 59985ee1d..c14f85556 100644 --- a/app-conf/HeuristicConf.xml +++ b/app-conf/HeuristicConf.xml @@ -354,4 +354,26 @@ views.html.help.spark.helpExecutorGcHeuristic + + + + tony + Task Memory + com.linkedin.drelephant.tony.heuristics.TaskMemoryHeuristic + views.html.help.tony.helpTaskMemory + + + + + + + + + tony + Task GPU + com.linkedin.drelephant.tony.heuristics.TaskGPUHeuristic + views.html.help.tony.helpTaskGPU + + + diff --git a/app-conf/JobTypeConf.xml b/app-conf/JobTypeConf.xml index 90e20f113..693e39db8 100644 --- a/app-conf/JobTypeConf.xml +++ b/app-conf/JobTypeConf.xml @@ -86,4 +86,10 @@ mapred.child.java.opts + + TonY + TONY + tony.application.name + + diff --git a/app-conf/elephant.conf b/app-conf/elephant.conf index ccd157fa6..bc65ddf02 100644 --- a/app-conf/elephant.conf +++ b/app-conf/elephant.conf @@ -1,5 +1,11 @@ # Play application server port -port=8080 +http_port=8080 + +# Un-comment these configs if need to enable SSL for Dr.Elephant +#https_port=8090 +#https_keystore_location=/path/to/keystore +#https_keystore_type=JKS +#https_keystore_password=password # Secret key # The secret key is used to secure cryptographics functions. diff --git a/app/com/linkedin/drelephant/ElephantRunner.java b/app/com/linkedin/drelephant/ElephantRunner.java index ea2162bac..55e6a8fe2 100644 --- a/app/com/linkedin/drelephant/ElephantRunner.java +++ b/app/com/linkedin/drelephant/ElephantRunner.java @@ -423,8 +423,7 @@ public void run() { logger.warn("Timed out while fetching data. Exception message is: " + e.getMessage()); jobFate(finishTimeInfo, appType, applicableFinishTime, jobFinishTime); } catch (Exception e) { - logger.error(e.getMessage()); - logger.error(ExceptionUtils.getStackTrace(e)); + logger.error(String.format("Failed to analyze %s", analysisName), e); jobFate(finishTimeInfo, appType, applicableFinishTime, jobFinishTime); } } diff --git a/app/com/linkedin/drelephant/analysis/AnalyticJobGeneratorHadoop2.java b/app/com/linkedin/drelephant/analysis/AnalyticJobGeneratorHadoop2.java index 7c18bbecb..69489812f 100644 --- a/app/com/linkedin/drelephant/analysis/AnalyticJobGeneratorHadoop2.java +++ b/app/com/linkedin/drelephant/analysis/AnalyticJobGeneratorHadoop2.java @@ -16,6 +16,7 @@ package com.linkedin.drelephant.analysis; +import com.google.common.annotations.VisibleForTesting; import com.linkedin.drelephant.ElephantContext; import com.linkedin.drelephant.math.Statistics; import controllers.MetricsController; @@ -65,7 +66,7 @@ public class AnalyticJobGeneratorHadoop2 implements AnalyticJobGenerator { private final Queue _firstRetryQueue = new ConcurrentLinkedQueue(); - private final ArrayList _secondRetryQueue = new ArrayList(); + private final List _secondRetryQueue = new LinkedList(); public void updateResourceManagerAddresses() { if (Boolean.valueOf(configuration.get(IS_RM_HA_ENABLED))) { @@ -171,19 +172,28 @@ public List fetchAnalyticJobs() appList.add(_firstRetryQueue.poll()); } - Iterator iteratorSecondRetry = _secondRetryQueue.iterator(); - while (iteratorSecondRetry.hasNext()) { - AnalyticJob job = (AnalyticJob) iteratorSecondRetry.next(); - if(job.readyForSecondRetry()) { - appList.add(job); - iteratorSecondRetry.remove(); - } - } + // Fetch jobs from second retry queue which are ready for second retry and + // add to app list. + fetchJobsFromSecondRetryQueue(appList); _lastTime = _currentTime; return appList; } + @VisibleForTesting + void fetchJobsFromSecondRetryQueue(List appList) { + synchronized (_secondRetryQueue) { + Iterator iteratorSecondRetry = _secondRetryQueue.iterator(); + while (iteratorSecondRetry.hasNext()) { + AnalyticJob job = (AnalyticJob) iteratorSecondRetry.next(); + if (job.readyForSecondRetry()) { + appList.add(job); + iteratorSecondRetry.remove(); + } + } + } + } + @Override public void addIntoRetries(AnalyticJob promise) { _firstRetryQueue.add(promise); @@ -193,9 +203,12 @@ public void addIntoRetries(AnalyticJob promise) { } @Override - public void addIntoSecondRetryQueue(AnalyticJob promise) { - _secondRetryQueue.add(promise.setTimeToSecondRetry()); - int secondRetryQueueSize = _secondRetryQueue.size(); + public void addIntoSecondRetryQueue(AnalyticJob job) { + int secondRetryQueueSize; + synchronized (_secondRetryQueue) { + _secondRetryQueue.add(job.setTimeToSecondRetry()); + secondRetryQueueSize = _secondRetryQueue.size(); + } MetricsController.setSecondRetryQueueSize(secondRetryQueueSize); logger.info("Second Retry queue size is " + secondRetryQueueSize); } diff --git a/app/com/linkedin/drelephant/analysis/HeuristicResult.java b/app/com/linkedin/drelephant/analysis/HeuristicResult.java index b4519e982..ffb6511ea 100644 --- a/app/com/linkedin/drelephant/analysis/HeuristicResult.java +++ b/app/com/linkedin/drelephant/analysis/HeuristicResult.java @@ -40,7 +40,7 @@ public class HeuristicResult { * Heuristic Result Constructor * * @param heuristicClass The Heuristic class - * @param heuristicName The name of the Heursitic + * @param heuristicName The name of the Heuristic * @param severity The severity of the result * @param score The computed score */ @@ -73,7 +73,7 @@ public HeuristicResult(String heuristicClass, String heuristicName, Severity sev /** * Returns the heuristic analyser class name * - * @return the heursitic class name + * @return the heuristic class name */ public String getHeuristicClassName() { return _heuristicClass; diff --git a/app/com/linkedin/drelephant/mapreduce/TaskLevelAggregatedMetrics.java b/app/com/linkedin/drelephant/mapreduce/TaskLevelAggregatedMetrics.java index 5668682be..463f362b6 100644 --- a/app/com/linkedin/drelephant/mapreduce/TaskLevelAggregatedMetrics.java +++ b/app/com/linkedin/drelephant/mapreduce/TaskLevelAggregatedMetrics.java @@ -142,7 +142,7 @@ private void compute(MapReduceTaskData[] taskDatas, long containerSize, long ide } // wastedResources - long wastedMemory = containerSize - (long) (peakMemoryNeed * MEMORY_BUFFER); // give a 50% buffer + long wastedMemory = containerSize - (long) (peakMemoryNeed * MEMORY_BUFFER); // give a 50% buffer if(wastedMemory > 0) { for (long duration : durations) { _resourceWasted += (wastedMemory) * (duration / Statistics.SECOND_IN_MS); // MB Seconds diff --git a/app/com/linkedin/drelephant/spark/heuristics/ExecutorGcHeuristic.scala b/app/com/linkedin/drelephant/spark/heuristics/ExecutorGcHeuristic.scala index 23da7db28..ed61ad606 100644 --- a/app/com/linkedin/drelephant/spark/heuristics/ExecutorGcHeuristic.scala +++ b/app/com/linkedin/drelephant/spark/heuristics/ExecutorGcHeuristic.scala @@ -38,7 +38,7 @@ class ExecutorGcHeuristic(private val heuristicConfigurationData: HeuristicConfi .getOrElse(DEFAULT_GC_SEVERITY_A_THRESHOLDS) val gcSeverityDThresholds: SeverityThresholds = - SeverityThresholds.parse(heuristicConfigurationData.getParamMap.get(GC_SEVERITY_D_THRESHOLDS_KEY), ascending = true) + SeverityThresholds.parse(heuristicConfigurationData.getParamMap.get(GC_SEVERITY_D_THRESHOLDS_KEY), ascending = false) .getOrElse(DEFAULT_GC_SEVERITY_D_THRESHOLDS) override def getHeuristicConfData(): HeuristicConfigurationData = heuristicConfigurationData diff --git a/app/com/linkedin/drelephant/tez/TezMetricsAggregator.java b/app/com/linkedin/drelephant/tez/TezMetricsAggregator.java index c04edd7fd..1330fab6c 100644 --- a/app/com/linkedin/drelephant/tez/TezMetricsAggregator.java +++ b/app/com/linkedin/drelephant/tez/TezMetricsAggregator.java @@ -85,11 +85,9 @@ public HadoopAggregatedData getResult() { private long getMapContainerSize(HadoopApplicationData data) { try { - long mapContainerSize = Long.parseLong(data.getConf().getProperty(TEZ_CONTAINER_CONFIG)); - if (mapContainerSize > 0) - return mapContainerSize; - else - return Long.parseLong(data.getConf().getProperty(MAP_CONTAINER_CONFIG)); + // Trying to get container size from tez config, if not found trying from MapReduce config + long mapContainerSize = data.getConf().containsKey(TEZ_CONTAINER_CONFIG) ? Long.parseLong(data.getConf().getProperty(TEZ_CONTAINER_CONFIG)) : -1; + return mapContainerSize > 0 ? mapContainerSize : Long.parseLong(data.getConf().getProperty(MAP_CONTAINER_CONFIG)); } catch ( NumberFormatException ex) { return CONTAINER_MEMORY_DEFAULT_BYTES; } @@ -97,11 +95,9 @@ private long getMapContainerSize(HadoopApplicationData data) { private long getReducerContainerSize(HadoopApplicationData data) { try { - long reducerContainerSize = Long.parseLong(data.getConf().getProperty(TEZ_CONTAINER_CONFIG)); - if (reducerContainerSize > 0) - return reducerContainerSize; - else - return Long.parseLong(data.getConf().getProperty(REDUCER_CONTAINER_CONFIG)); + // Trying to get container size from tez config, if not found trying from MapReduce config + long reducerContainerSize = data.getConf().containsKey(TEZ_CONTAINER_CONFIG) ? Long.parseLong(data.getConf().getProperty(TEZ_CONTAINER_CONFIG)) : -1; + return reducerContainerSize > 0 ? reducerContainerSize: Long.parseLong(data.getConf().getProperty(REDUCER_CONTAINER_CONFIG)); } catch ( NumberFormatException ex) { return CONTAINER_MEMORY_DEFAULT_BYTES; } diff --git a/app/com/linkedin/drelephant/tez/data/TezCounterData.java b/app/com/linkedin/drelephant/tez/data/TezCounterData.java index 9aac00117..4571aaad7 100644 --- a/app/com/linkedin/drelephant/tez/data/TezCounterData.java +++ b/app/com/linkedin/drelephant/tez/data/TezCounterData.java @@ -100,11 +100,16 @@ public static enum CounterName { HDFS_READ_OPS(GroupName.FileSystemCounters, "HDFS_READ_OPS", "HDFS_READ_OPS"), HDFS_LARGE_READ_OPS(GroupName.FileSystemCounters, "HDFS_LARGE_READ_OPS", "HDFS_LARGE_READ_OPS"), HDFS_WRITE_OPS(GroupName.FileSystemCounters, "HDFS_WRITE_OPS", "HDFS_WRITE_OPS"), + S3_BYTES_READ(GroupName.FileSystemCounters, "S3_BYTES_READ", "S3_BYTES_READ"), + S3_BYTES_WRITTEN(GroupName.FileSystemCounters, "S3_BYTES_WRITTEN", "S3_BYTES_WRITTEN"), + S3_READ_OPS(GroupName.FileSystemCounters, "S3_READ_OPS", "S3_READ_OPS"), + S3_LARGE_READ_OPS(GroupName.FileSystemCounters, "S3_LARGE_READ_OPS", "S3_LARGE_READ_OPS"), + S3_WRITE_OPS(GroupName.FileSystemCounters, "S3_WRITE_OPS", "S3_WRITE_OPS"), S3A_BYTES_READ(GroupName.FileSystemCounters, "S3A_BYTES_READ", "S3A_BYTES_READ"), S3A_BYTES_WRITTEN(GroupName.FileSystemCounters, "S3A_BYTES_WRITTEN", "S3A_BYTES_WRITTEN"), S3A_READ_OPS(GroupName.FileSystemCounters, "S3A_READ_OPS", "S3A_READ_OPS"), S3A_LARGE_READ_OPS(GroupName.FileSystemCounters, "S3A_LARGE_READ_OPS", "S3A_LARGE_READ_OPS"), - S3A_WRITE_OPS(GroupName.FileSystemCounters, "S3A_WRITE_OPS", "S3_WRITE_OPS"), + S3A_WRITE_OPS(GroupName.FileSystemCounters, "S3A_WRITE_OPS", "S3A_WRITE_OPS"), S3N_BYTES_READ(GroupName.FileSystemCounters, "S3N_BYTES_READ", "S3N_BYTES_READ"), S3N_BYTES_WRITTEN(GroupName.FileSystemCounters, "S3N_BYTES_WRITTEN", "S3N_BYTES_WRITTEN"), S3N_READ_OPS(GroupName.FileSystemCounters, "S3N_READ_OPS", "S3N_READ_OPS"), diff --git a/app/com/linkedin/drelephant/tez/heuristics/MapperDataSkewHeuristic.java b/app/com/linkedin/drelephant/tez/heuristics/MapperDataSkewHeuristic.java index b17ebe3b2..1e5395313 100644 --- a/app/com/linkedin/drelephant/tez/heuristics/MapperDataSkewHeuristic.java +++ b/app/com/linkedin/drelephant/tez/heuristics/MapperDataSkewHeuristic.java @@ -35,6 +35,7 @@ public class MapperDataSkewHeuristic extends GenericDataSkewHeuristic { public MapperDataSkewHeuristic(HeuristicConfigurationData heuristicConfData) { super(Arrays.asList( TezCounterData.CounterName.HDFS_BYTES_READ, + TezCounterData.CounterName.S3_BYTES_READ, TezCounterData.CounterName.S3A_BYTES_READ, TezCounterData.CounterName.S3N_BYTES_READ ), heuristicConfData); diff --git a/app/com/linkedin/drelephant/tez/heuristics/MapperSpeedHeuristic.java b/app/com/linkedin/drelephant/tez/heuristics/MapperSpeedHeuristic.java index 1694e32ea..07c3ee051 100644 --- a/app/com/linkedin/drelephant/tez/heuristics/MapperSpeedHeuristic.java +++ b/app/com/linkedin/drelephant/tez/heuristics/MapperSpeedHeuristic.java @@ -53,6 +53,7 @@ public class MapperSpeedHeuristic implements Heuristic { private List _counterNames = Arrays.asList( TezCounterData.CounterName.HDFS_BYTES_READ, + TezCounterData.CounterName.S3_BYTES_READ, TezCounterData.CounterName.S3A_BYTES_READ, TezCounterData.CounterName.S3N_BYTES_READ ); diff --git a/app/com/linkedin/drelephant/tez/heuristics/MapperTimeHeuristic.java b/app/com/linkedin/drelephant/tez/heuristics/MapperTimeHeuristic.java index 838bd7a59..bbf0d0639 100644 --- a/app/com/linkedin/drelephant/tez/heuristics/MapperTimeHeuristic.java +++ b/app/com/linkedin/drelephant/tez/heuristics/MapperTimeHeuristic.java @@ -51,6 +51,7 @@ public class MapperTimeHeuristic implements Heuristic { private List _counterNames = Arrays.asList( TezCounterData.CounterName.HDFS_BYTES_READ, + TezCounterData.CounterName.S3_BYTES_READ, TezCounterData.CounterName.S3A_BYTES_READ, TezCounterData.CounterName.S3N_BYTES_READ ); diff --git a/app/com/linkedin/drelephant/tez/heuristics/TezScopeDataSkewHeuristic.java b/app/com/linkedin/drelephant/tez/heuristics/TezScopeDataSkewHeuristic.java index 3486328d7..3d125b417 100644 --- a/app/com/linkedin/drelephant/tez/heuristics/TezScopeDataSkewHeuristic.java +++ b/app/com/linkedin/drelephant/tez/heuristics/TezScopeDataSkewHeuristic.java @@ -35,6 +35,7 @@ public class TezScopeDataSkewHeuristic extends GenericDataSkewHeuristic { public TezScopeDataSkewHeuristic(HeuristicConfigurationData heuristicConfData) { super(Arrays.asList( TezCounterData.CounterName.HDFS_BYTES_READ, + TezCounterData.CounterName.S3_BYTES_READ, TezCounterData.CounterName.S3A_BYTES_READ, TezCounterData.CounterName.S3N_BYTES_READ ), heuristicConfData); diff --git a/app/com/linkedin/drelephant/tez/heuristics/TezScopeSpeedHeuristic.java b/app/com/linkedin/drelephant/tez/heuristics/TezScopeSpeedHeuristic.java index 2ed0e833c..90b22e5cd 100644 --- a/app/com/linkedin/drelephant/tez/heuristics/TezScopeSpeedHeuristic.java +++ b/app/com/linkedin/drelephant/tez/heuristics/TezScopeSpeedHeuristic.java @@ -53,6 +53,7 @@ public class TezScopeSpeedHeuristic implements Heuristic { private List _counterNames = Arrays.asList( TezCounterData.CounterName.HDFS_BYTES_READ, + TezCounterData.CounterName.S3_BYTES_READ, TezCounterData.CounterName.S3A_BYTES_READ, TezCounterData.CounterName.S3N_BYTES_READ ); diff --git a/app/com/linkedin/drelephant/tez/heuristics/TezScopeTimeHeuristic.java b/app/com/linkedin/drelephant/tez/heuristics/TezScopeTimeHeuristic.java index 0a6f16283..c89bd666a 100644 --- a/app/com/linkedin/drelephant/tez/heuristics/TezScopeTimeHeuristic.java +++ b/app/com/linkedin/drelephant/tez/heuristics/TezScopeTimeHeuristic.java @@ -51,6 +51,7 @@ public class TezScopeTimeHeuristic implements Heuristic { private List _counterNames = Arrays.asList( TezCounterData.CounterName.HDFS_BYTES_READ, + TezCounterData.CounterName.S3_BYTES_READ, TezCounterData.CounterName.S3A_BYTES_READ, TezCounterData.CounterName.S3N_BYTES_READ ); diff --git a/app/com/linkedin/drelephant/tony/TonyMetricsAggregator.java b/app/com/linkedin/drelephant/tony/TonyMetricsAggregator.java new file mode 100644 index 000000000..540e54d22 --- /dev/null +++ b/app/com/linkedin/drelephant/tony/TonyMetricsAggregator.java @@ -0,0 +1,94 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony; + +import com.google.common.annotations.VisibleForTesting; +import com.linkedin.drelephant.analysis.HadoopAggregatedData; +import com.linkedin.drelephant.analysis.HadoopApplicationData; +import com.linkedin.drelephant.analysis.HadoopMetricsAggregator; +import com.linkedin.drelephant.configurations.aggregator.AggregatorConfigurationData; +import com.linkedin.drelephant.math.Statistics; +import com.linkedin.drelephant.tony.data.TonyApplicationData; +import com.linkedin.drelephant.tony.data.TonyTaskData; +import com.linkedin.drelephant.tony.util.TonyUtils; +import com.linkedin.tony.Constants; +import com.linkedin.tony.TonyConfigurationKeys; +import java.util.Map; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; + + +public class TonyMetricsAggregator implements HadoopMetricsAggregator { + @VisibleForTesting + static final double MEMORY_BUFFER = 1.5; + + private HadoopAggregatedData _hadoopAggregatedData; + + /** + * Creates a new {@code TonyMetricsAggregator}. + * @param unused Dr. Elephant expects a constructor of this form but {@code TonyMetricsAggregator} does not need this + */ + public TonyMetricsAggregator(AggregatorConfigurationData unused) { } + + @Override + public void aggregate(HadoopApplicationData data) { + _hadoopAggregatedData = new HadoopAggregatedData(); + + TonyApplicationData tonyData = (TonyApplicationData) data; + Configuration tonyConf = tonyData.getConfiguration(); + + long mbSecUsed = 0; + long mbSecWasted = 0; + + Map> taskMap = tonyData.getTaskMap(); + for (Map.Entry> entry : taskMap.entrySet()) { + String taskType = entry.getKey(); + + String memoryString = tonyConf.get(TonyConfigurationKeys.getResourceKey(taskType, Constants.MEMORY)); + String memoryStringMB = com.linkedin.tony.util.Utils.parseMemoryString(memoryString); + long mbRequested = Long.parseLong(memoryStringMB); + double maxMemoryMBUsed = TonyUtils.getMaxMetricForTaskTypeAndMetricName(taskMap, taskType, + Constants.MAX_MEMORY_BYTES) / FileUtils.ONE_MB; + + for (TonyTaskData taskData : entry.getValue().values()) { + long taskDurationSec = (taskData.getTaskEndTime() - taskData.getTaskStartTime()) / Statistics.SECOND_IN_MS; + if (taskDurationSec < 0) { + // Most likely TASK_FINISHED and APPLICATION_FINISHED events are missing for the task. + continue; + } + mbSecUsed += mbRequested * taskDurationSec; + + if (maxMemoryMBUsed == 0) { + // If we don't have max memory metrics, don't calculate wasted memory. + continue; + } + long wastedMemory = (long) (mbRequested - maxMemoryMBUsed * MEMORY_BUFFER); + if (wastedMemory > 0) { + mbSecWasted += wastedMemory * taskDurationSec; + } + } + } + + _hadoopAggregatedData.setResourceUsed(mbSecUsed); + _hadoopAggregatedData.setResourceWasted(mbSecWasted); + // TODO: Calculate and set delay + } + + @Override + public HadoopAggregatedData getResult() { + return _hadoopAggregatedData; + } +} diff --git a/app/com/linkedin/drelephant/tony/data/TonyApplicationData.java b/app/com/linkedin/drelephant/tony/data/TonyApplicationData.java new file mode 100644 index 000000000..7021da0a5 --- /dev/null +++ b/app/com/linkedin/drelephant/tony/data/TonyApplicationData.java @@ -0,0 +1,137 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony.data; + +import com.linkedin.drelephant.analysis.ApplicationType; +import com.linkedin.drelephant.analysis.HadoopApplicationData; +import com.linkedin.tony.events.Event; +import com.linkedin.tony.events.EventType; +import com.linkedin.tony.events.TaskFinished; +import com.linkedin.tony.events.TaskStarted; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import org.apache.hadoop.conf.Configuration; + + +public class TonyApplicationData implements HadoopApplicationData { + private String _appId; + private ApplicationType _appType; + private Configuration _configuration; + private Properties _props; + private Map> _taskMap; + + /** + * Constructor for {@code TonyApplicationData}. + * @param appId application id + * @param appType application type (should be TONY) + * @param configuration the configuration for this application + * @param events the events emitted by this application + */ + public TonyApplicationData(String appId, ApplicationType appType, Configuration configuration, List events) { + _appId = appId; + _appType = appType; + + _configuration = configuration; + _props = new Properties(); + for (Map.Entry entry : configuration) { + _props.setProperty(entry.getKey(), entry.getValue()); + } + + _taskMap = new HashMap<>(); + processEvents(events); + } + + @Override + public String getAppId() { + return _appId; + } + + /** + * Returns the {@link Configuration} for this application. + * @return the configuration for this application + */ + public Configuration getConfiguration() { + return _configuration; + } + + @Override + public Properties getConf() { + return _props; + } + + @Override + public ApplicationType getApplicationType() { + return _appType; + } + + /** + * Returns a map of task data. + * @return a map from task type to a map of task index to task data + */ + public Map> getTaskMap() { + return _taskMap; + } + + @Override + public boolean isEmpty() { + return false; + } + + private void initTaskMap(String taskType, int taskIndex) { + if (!_taskMap.containsKey(taskType)) { + _taskMap.put(taskType, new HashMap<>()); + } + + if (!_taskMap.get(taskType).containsKey(taskIndex)) { + _taskMap.get(taskType).put(taskIndex, new TonyTaskData(taskType, taskIndex)); + } + } + + private void processEvents(List events) { + long appFinishedTime = 0; + for (Event event : events) { + if (event.getType().equals(EventType.TASK_STARTED)) { + TaskStarted taskStartedEvent = (TaskStarted) event.getEvent(); + String taskType = taskStartedEvent.getTaskType(); + int taskIndex = taskStartedEvent.getTaskIndex(); + initTaskMap(taskType, taskIndex); + _taskMap.get(taskType).get(taskIndex).setTaskStartTime(event.getTimestamp()); + } else if (event.getType().equals(EventType.TASK_FINISHED)) { + TaskFinished taskFinishedEvent = (TaskFinished) event.getEvent(); + String taskType = taskFinishedEvent.getTaskType(); + int taskIndex = taskFinishedEvent.getTaskIndex(); + initTaskMap(taskType, taskIndex); + _taskMap.get(taskType).get(taskIndex).setTaskEndTime(event.getTimestamp()); + _taskMap.get(taskType).get(taskIndex).setMetrics(taskFinishedEvent.getMetrics()); + } else if (event.getType().equals(EventType.APPLICATION_FINISHED)) { + appFinishedTime = event.getTimestamp(); + } + } + + // Set end time for any tasks that don't have end times to application finish time + if (appFinishedTime > 0) { + for (Map taskDataMap : _taskMap.values()) { + for (TonyTaskData taskData : taskDataMap.values()) { + if (taskData.getTaskEndTime() == 0) { + taskData.setTaskEndTime(appFinishedTime); + } + } + } + } + } +} diff --git a/app/com/linkedin/drelephant/tony/data/TonyTaskData.java b/app/com/linkedin/drelephant/tony/data/TonyTaskData.java new file mode 100644 index 000000000..40066f325 --- /dev/null +++ b/app/com/linkedin/drelephant/tony/data/TonyTaskData.java @@ -0,0 +1,88 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony.data; + +import com.linkedin.tony.events.Metric; +import java.util.List; + + +public class TonyTaskData { + private final String _taskType; + private final int _taskIndex; + + private long _taskStartTime; + private long _taskEndTime; + private List _metrics; + + /** + * Creates a new {@code TonyTaskData} encapsulating a task's data, such as task type, index, start time, + * end time, and metrics. + * @param taskType the task type + * @param taskIndex the task index + */ + public TonyTaskData(String taskType, int taskIndex) { + _taskType = taskType; + _taskIndex = taskIndex; + } + + /** + * Get task start time. + * @return the task start time + */ + public long getTaskStartTime() { + return _taskStartTime; + } + + /** + * Set task start time. + * @param taskStartTime the task start time + */ + public void setTaskStartTime(long taskStartTime) { + _taskStartTime = taskStartTime; + } + + /** + * Get task end time. + * @return the task end time + */ + public long getTaskEndTime() { + return _taskEndTime; + } + + /** + * Set task end time. + * @param taskEndTime the end time + */ + public void setTaskEndTime(long taskEndTime) { + _taskEndTime = taskEndTime; + } + + /** + * Get the metrics for this task. + * @return the metrics + */ + public List getMetrics() { + return _metrics; + } + + /** + * Sets the metrics for this task. + * @param metrics the metrics + */ + public void setMetrics(List metrics) { + _metrics = metrics; + } +} diff --git a/app/com/linkedin/drelephant/tony/fetchers/TonyFetcher.java b/app/com/linkedin/drelephant/tony/fetchers/TonyFetcher.java new file mode 100644 index 000000000..82e271bb0 --- /dev/null +++ b/app/com/linkedin/drelephant/tony/fetchers/TonyFetcher.java @@ -0,0 +1,110 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony.fetchers; + +import com.linkedin.drelephant.analysis.AnalyticJob; +import com.linkedin.drelephant.analysis.ElephantFetcher; +import com.linkedin.drelephant.configurations.fetcher.FetcherConfigurationData; +import com.linkedin.drelephant.tony.data.TonyApplicationData; +import com.linkedin.tony.Constants; +import com.linkedin.tony.TonyConfigurationKeys; +import com.linkedin.tony.events.Event; +import com.linkedin.tony.util.ParserUtils; +import java.io.IOException; +import java.util.Date; +import java.util.List; +import java.time.ZoneId; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.log4j.Logger; + + +public class TonyFetcher implements ElephantFetcher { + private static final Logger _LOGGER = Logger.getLogger(TonyFetcher.class); + private final Path _intermediateDir; + private final Path _finishedDir; + private String _finishedDirTimezone; + private final FileSystem _fs; + private ZoneId _zoneId; + + /** + * Constructor for {@link TonyFetcher}. + * @param fetcherConfig the fetcher configuration + * @throws IOException + */ + public TonyFetcher(FetcherConfigurationData fetcherConfig) throws IOException { + Configuration conf = new Configuration(); + + String tonyConfDir = System.getenv(Constants.TONY_CONF_DIR); + if (fetcherConfig.getParamMap().containsKey(Constants.TONY_CONF_DIR)) { + tonyConfDir = fetcherConfig.getParamMap().get(Constants.TONY_CONF_DIR); + } + _LOGGER.info("Using TonY conf dir: " + tonyConfDir); + + conf.addResource(new Path(tonyConfDir + Path.SEPARATOR + Constants.TONY_SITE_CONF)); + _intermediateDir = new Path(conf.get(TonyConfigurationKeys.TONY_HISTORY_INTERMEDIATE)); + _finishedDir = new Path(conf.get(TonyConfigurationKeys.TONY_HISTORY_FINISHED)); + _fs = _finishedDir.getFileSystem(conf); + + _finishedDirTimezone = conf.get(TonyConfigurationKeys.TONY_HISTORY_FINISHED_DIR_TIMEZONE); + if (_finishedDirTimezone == null) { + _finishedDirTimezone = TonyConfigurationKeys.DEFAULT_TONY_HISTORY_FINISHED_DIR_TIMEZONE; + } + _zoneId = ZoneId.of(_finishedDirTimezone); + _LOGGER.info("Using ZoneID: " + _zoneId.getId()); + } + + @Override + public TonyApplicationData fetchData(AnalyticJob job) throws Exception { + _LOGGER.debug("Fetching data for job " + job.getAppId()); + long finishTime = job.getFinishTime(); + Date date = new Date(finishTime); + + // TODO: We are deriving yyyy/MM/dd from the RM's application finish time, but the TonY Portal actually creates the + // yyyy/MM/dd directory based off the end time embedded in the jhist file name. This end time is taken before the + // application finishes and thus is slightly before the RM's application finish time. So it's possible that the + // yyyy/MM/dd derived from the RM's application finish time is a day later and we may not find the history files. + // In case we don't find the history files in yyyy/MM/dd, we should check the previous day as well. + String yearMonthDay = ParserUtils.getYearMonthDayDirectory(date, _zoneId); + Path jobDir = new Path(_finishedDir, yearMonthDay + Path.SEPARATOR + job.getAppId()); + if (!_fs.exists(jobDir)) { + // check intermediate dir + jobDir = new Path(_intermediateDir, job.getAppId()); + } + + _LOGGER.debug("Job directory for " + job.getAppId() + ": " + jobDir); + + // parse config + Path confFile = new Path(jobDir, Constants.TONY_FINAL_XML); + if (!_fs.exists(confFile)) { + // for backward compatibility, see https://github.com/linkedin/TonY/issues/271 + confFile = new Path(jobDir, "config.xml"); + } + Configuration conf = new Configuration(false); + if (_fs.exists(confFile)) { + conf.addResource(_fs.open(confFile)); + } + + // Parse events. For a list of event types, see + // https://github.com/linkedin/TonY/blob/master/tony-core/src/main/avro/EventType.avsc. + // We get the task start time from the TASK_STARTED event and the finish time and metrics from the TASK_FINISHED + // event. + List events = ParserUtils.parseEvents(_fs, jobDir); + + return new TonyApplicationData(job.getAppId(), job.getAppType(), conf, events); + } +} diff --git a/app/com/linkedin/drelephant/tony/heuristics/TaskGPUHeuristic.java b/app/com/linkedin/drelephant/tony/heuristics/TaskGPUHeuristic.java new file mode 100644 index 000000000..c1135ea23 --- /dev/null +++ b/app/com/linkedin/drelephant/tony/heuristics/TaskGPUHeuristic.java @@ -0,0 +1,120 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony.heuristics; + +import com.linkedin.drelephant.analysis.Heuristic; +import com.linkedin.drelephant.analysis.HeuristicResult; +import com.linkedin.drelephant.analysis.HeuristicResultDetails; +import com.linkedin.drelephant.analysis.Severity; +import com.linkedin.drelephant.configurations.heuristic.HeuristicConfigurationData; +import com.linkedin.drelephant.tony.data.TonyApplicationData; +import com.linkedin.drelephant.tony.data.TonyTaskData; +import com.linkedin.drelephant.tony.util.TonyUtils; +import com.linkedin.tony.Constants; +import com.linkedin.tony.TonyConfigurationKeys; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.hadoop.conf.Configuration; +import org.apache.log4j.Logger; + + +/** + * For each type of task (e.g.: worker, ps), this heuristic checks the GPU utilization across all tasks of that type + * and compares against some thresholds. The final severity is the highest severity across all task types. + */ +public class TaskGPUHeuristic implements Heuristic { + private static final Logger _LOGGER = Logger.getLogger(TaskGPUHeuristic.class); + private static final String[] MAX_METRICS_TO_APPLY = { + Constants.MAX_GPU_UTILIZATION, + Constants.MAX_GPU_FB_MEMORY_USAGE, + Constants.MAX_GPU_MAIN_MEMORY_USAGE}; + private static final String[] AVG_METRICS_TO_APPLY = { + Constants.AVG_GPU_UTILIZATION, + Constants.AVG_GPU_FB_MEMORY_USAGE, + Constants.AVG_GPU_MAIN_MEMORY_USAGE + }; + + private HeuristicConfigurationData _heuristicConfData; + + /** + * Constructor for {@link TaskGPUHeuristic}. + * @param heuristicConfData the configuration for this heuristic + */ + public TaskGPUHeuristic(HeuristicConfigurationData heuristicConfData) { + this._heuristicConfData = heuristicConfData; + } + + @Override + public HeuristicResult apply(TonyApplicationData data) { + _LOGGER.debug("Applying TaskGPUHeuristic"); + Map> taskMap = data.getTaskMap(); + Configuration conf = data.getConfiguration(); + + Set taskTypes = com.linkedin.tony.util.Utils.getAllJobTypes(conf); + Severity finalSeverity = Severity.NONE; + List details = new ArrayList<>(); + + for (String taskType : taskTypes) { + details.add(new HeuristicResultDetails("Number of " + taskType + " tasks", + Integer.toString(taskMap.get(taskType).size()))); + + // get number of GPU resource requested, if any + int numGPUsRequested = conf.getInt(TonyConfigurationKeys.getResourceKey(taskType, Constants.GPUS), 0); + if (numGPUsRequested > 0) { + details.add(new HeuristicResultDetails("Number of GPUs requested per " + taskType + " tasks", + Integer.toString(numGPUsRequested))); + } + + // get global max gpu utilization metrics + for (String maxMetricToApply : MAX_METRICS_TO_APPLY) { + double maxMetric = TonyUtils.getMaxMetricForTaskTypeAndMetricName(taskMap, taskType, maxMetricToApply); + + if (maxMetric <= 0 || Double.isNaN(maxMetric)) { + details.add(new HeuristicResultDetails(maxMetricToApply + " in any " + taskType + " task", "Unknown")); + continue; + } + details.add(new HeuristicResultDetails(maxMetricToApply + " in any " + taskType + " task", + String.format("%.2f", maxMetric) + "%")); + } + + // get global average gpu utilization metrics + for (String avgMetricToApply : AVG_METRICS_TO_APPLY) { + double avgMetric = TonyUtils.getAvgMetricForTaskTypeAndMetricName(taskMap, taskType, avgMetricToApply); + + if (avgMetric <= 0 || Double.isNaN(avgMetric)) { + details.add(new HeuristicResultDetails(avgMetricToApply + " in any " + taskType + " task", "Unknown")); + continue; + } + details.add(new HeuristicResultDetails(avgMetricToApply + " in any " + taskType + " task", + String.format("%.2f", avgMetric) + "%")); + } + + // compare to threshold and update severity + // TODO: pass all metrics collected as not severe for now, update heuristic when collected enough data to + // determine the appropriate threshold. + } + + return new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), finalSeverity, + 0, details); + } + + @Override + public HeuristicConfigurationData getHeuristicConfData() { + return _heuristicConfData; + } +} diff --git a/app/com/linkedin/drelephant/tony/heuristics/TaskMemoryHeuristic.java b/app/com/linkedin/drelephant/tony/heuristics/TaskMemoryHeuristic.java new file mode 100644 index 000000000..ecab015b6 --- /dev/null +++ b/app/com/linkedin/drelephant/tony/heuristics/TaskMemoryHeuristic.java @@ -0,0 +1,139 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony.heuristics; + +import com.linkedin.drelephant.analysis.Heuristic; +import com.linkedin.drelephant.analysis.HeuristicResult; +import com.linkedin.drelephant.analysis.HeuristicResultDetails; +import com.linkedin.drelephant.analysis.Severity; +import com.linkedin.drelephant.configurations.heuristic.HeuristicConfigurationData; +import com.linkedin.drelephant.tony.data.TonyApplicationData; +import com.linkedin.drelephant.tony.data.TonyTaskData; +import com.linkedin.drelephant.tony.util.TonyUtils; +import com.linkedin.drelephant.util.Utils; +import com.linkedin.tony.Constants; +import com.linkedin.tony.TonyConfigurationKeys; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.log4j.Logger; + + +/** + * For each type of task (e.g.: worker, ps), this heuristic checks the max memory used across all tasks of that type + * and compares against some thresholds. The final severity is the highest severity across all task types. + */ +public class TaskMemoryHeuristic implements Heuristic { + private static final Logger _LOGGER = Logger.getLogger(TaskMemoryHeuristic.class); + private static final int DEFAULT_CONTAINER_MEMORY_MB = 2048; + private static final String CONTAINER_MEMORY_DEFAULT_MB_CONF = "container_memory_default_mb"; + private static final String TASK_MEMORY_THRESHOLDS_CONF = "task_memory_thresholds"; + + private HeuristicConfigurationData _heuristicConfData; + private long defaultContainerMemoryBytes = DEFAULT_CONTAINER_MEMORY_MB * FileUtils.ONE_MB; + + // Initialized to default max memory thresholds + private double[] maxMemoryLimits = {0.8, 0.7, 0.6, 0.5}; + + // If the requested memory is within this amount of the max memory usage, automatic pass. + // This is to prevent Dr. Elephant flagging container sizes of 3 GB when max memory usage is 2 GB. + private long graceMemoryHeadroomBytes; + + /** + * Constructor for {@link TaskMemoryHeuristic}. + * @param heuristicConfData the configuration for this heuristic + */ + public TaskMemoryHeuristic(HeuristicConfigurationData heuristicConfData) { + this._heuristicConfData = heuristicConfData; + + Map params = heuristicConfData.getParamMap(); + // read default container size + if (params.containsKey(CONTAINER_MEMORY_DEFAULT_MB_CONF)) { + defaultContainerMemoryBytes = Long.parseLong(params.get(CONTAINER_MEMORY_DEFAULT_MB_CONF)) * FileUtils.ONE_MB; + } + // read max memory thresholds + if (params.containsKey(TASK_MEMORY_THRESHOLDS_CONF)) { + maxMemoryLimits = Utils.getParam(params.get(TASK_MEMORY_THRESHOLDS_CONF), maxMemoryLimits.length); + } + + Configuration yarnConf = new YarnConfiguration(); + int minimumMBAllocation = yarnConf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB); + graceMemoryHeadroomBytes = 2 * minimumMBAllocation * FileUtils.ONE_MB; + } + + @Override + public HeuristicResult apply(TonyApplicationData data) { + _LOGGER.debug("Applying TaskMemoryHeuristic"); + Map> taskMap = data.getTaskMap(); + Configuration conf = data.getConfiguration(); + + Set taskTypes = com.linkedin.tony.util.Utils.getAllJobTypes(conf); + Severity finalSeverity = Severity.NONE; + List details = new ArrayList<>(); + int severityScore = 0; + + for (String taskType : taskTypes) { + int taskInstances = conf.getInt(TonyConfigurationKeys.getInstancesKey(taskType), 0); + details.add(new HeuristicResultDetails("Number of " + taskType + " tasks", String.valueOf(taskInstances))); + if (taskInstances == 0) { + continue; + } + + // get per task memory requested + String memoryString = conf.get(TonyConfigurationKeys.getResourceKey(taskType, Constants.MEMORY), + TonyConfigurationKeys.DEFAULT_MEMORY); + String memoryStringMB = com.linkedin.tony.util.Utils.parseMemoryString(memoryString); + long taskBytesRequested = Long.parseLong(memoryStringMB) * FileUtils.ONE_MB; + details.add(new HeuristicResultDetails("Requested memory (MB) per " + taskType + " task", + Long.toString(taskBytesRequested / FileUtils.ONE_MB))); + + // get global max memory per task + double maxMemoryBytesUsed = TonyUtils.getMaxMetricForTaskTypeAndMetricName(taskMap, taskType, + Constants.MAX_MEMORY_BYTES); + if (maxMemoryBytesUsed <= 0) { + details.add(new HeuristicResultDetails("Max memory (MB) used in any " + taskType + " task", "Unknown")); + continue; + } + details.add(new HeuristicResultDetails("Max memory (MB) used in any " + taskType + " task", + Long.toString((long) maxMemoryBytesUsed / FileUtils.ONE_MB))); + + // compare to threshold and update severity + if (taskBytesRequested <= defaultContainerMemoryBytes + || taskBytesRequested <= maxMemoryBytesUsed + graceMemoryHeadroomBytes) { + // If using default container memory or within grace headroom, automatic pass + continue; + } + double maxMemoryRatio = maxMemoryBytesUsed / taskBytesRequested; + Severity taskMemorySeverity = Severity.getSeverityDescending(maxMemoryRatio, maxMemoryLimits[0], + maxMemoryLimits[1], maxMemoryLimits[2], maxMemoryLimits[3]); + severityScore += Utils.getHeuristicScore(taskMemorySeverity, taskInstances); + finalSeverity = Severity.max(finalSeverity, taskMemorySeverity); + } + + return new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), finalSeverity, + severityScore, details); + } + + @Override + public HeuristicConfigurationData getHeuristicConfData() { + return _heuristicConfData; + } +} diff --git a/app/com/linkedin/drelephant/tony/util/TonyUtils.java b/app/com/linkedin/drelephant/tony/util/TonyUtils.java new file mode 100644 index 000000000..d55fe2a42 --- /dev/null +++ b/app/com/linkedin/drelephant/tony/util/TonyUtils.java @@ -0,0 +1,96 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony.util; + +import com.linkedin.drelephant.tony.data.TonyTaskData; +import com.linkedin.tony.events.Metric; +import java.util.List; +import java.util.Map; + + +public class TonyUtils { + /** + * Returns the max metric value of any task of the specified type given metricName + * Skips metrics collection for unavailable metric data + * @param taskMap a map containing data for all tasks + * @param taskType the task type + * @param metricName the name of the metric to query + * @return the max metric value of any task of the specified type + */ + public static double getMaxMetricForTaskTypeAndMetricName(Map> taskMap, + String taskType, String metricName) { + double maxMetric = 0.0; + if (taskMap.get(taskType) == null) { + return -1.0d; + } + + for (TonyTaskData taskData : taskMap.get(taskType).values()) { + List metrics = taskData.getMetrics(); + if (metrics == null) { + continue; + } + for (Metric metric : metrics) { + if (metric.getName().equals(metricName)) { + if (metric.getValue() <= 0) { + continue; + } + + if (metric.getValue() > maxMetric) { + maxMetric = metric.getValue(); + } + } + } + } + + return maxMetric; + } + + /** + * Returns the average metric value of all task of the specified type given metricName + * Skips metrics collection for unavailable metric data + * @param taskMap a map containing data for all tasks + * @param taskType the task type + * @param metricName the name of the metric to query + * @return the average metric value of any task of the specified type + */ + public static double getAvgMetricForTaskTypeAndMetricName(Map> taskMap, + String taskType, String metricName) { + double avgMetric = 0.0; + double numMetrics = 0; + if (taskMap.get(taskType) == null) { + return -1.0d; + } + for (TonyTaskData taskData : taskMap.get(taskType).values()) { + double avgMetricPerTask = 0.0; + List metrics = taskData.getMetrics(); + if (metrics == null) { + continue; + } + for (Metric metric : metrics) { + if (metric.getName().equals(metricName)) { + if (metric.getValue() <= 0) { + continue; + } + + avgMetric += metric.getValue(); + numMetrics++; + } + } + } + + return avgMetric / numMetrics; + } +} diff --git a/app/com/linkedin/drelephant/util/InfoExtractor.java b/app/com/linkedin/drelephant/util/InfoExtractor.java index 6ff0f4d1b..74c1a106c 100644 --- a/app/com/linkedin/drelephant/util/InfoExtractor.java +++ b/app/com/linkedin/drelephant/util/InfoExtractor.java @@ -20,11 +20,6 @@ import com.linkedin.drelephant.clients.WorkflowClient; import com.linkedin.drelephant.configurations.scheduler.SchedulerConfiguration; import com.linkedin.drelephant.configurations.scheduler.SchedulerConfigurationData; - -import com.linkedin.drelephant.tez.data.TezApplicationData; -import com.linkedin.drelephant.clients.WorkflowClient; - -import com.linkedin.drelephant.mapreduce.data.MapReduceApplicationData; import com.linkedin.drelephant.schedulers.Scheduler; import com.linkedin.drelephant.spark.data.SparkApplicationData; @@ -41,7 +36,6 @@ import models.AppResult; import scala.Option; -import scala.Some; /** @@ -114,14 +108,11 @@ public static Scheduler getSchedulerInstance(String appId, Properties properties * @param data The Hadoop application data */ public static void loadInfo(AppResult result, HadoopApplicationData data) { - Properties properties = new Properties(); - if( data instanceof MapReduceApplicationData) { - properties = retrieveMapreduceProperties((MapReduceApplicationData) data); - } else if ( data instanceof SparkApplicationData) { + Properties properties; + if (data instanceof SparkApplicationData) { properties = retrieveSparkProperties((SparkApplicationData) data); - } - else if(data instanceof TezApplicationData){ - properties = retrieveTezProperties((TezApplicationData) data); + } else { + properties = data.getConf(); } Scheduler scheduler = getSchedulerInstance(data.getAppId(), properties); @@ -164,19 +155,6 @@ public static Properties retrieveSparkProperties(SparkApplicationData appData) { return properties; } - /** - * Retrieve the mapreduce application properties - * @param appData the mapReduce Application Data - * @return the retrieve mapreduce properties - */ - public static Properties retrieveMapreduceProperties(MapReduceApplicationData appData) { - return appData.getConf(); - } - - public static Properties retrieveTezProperties(TezApplicationData appData) { - return appData.getConf(); - } - /** * Populates the given app result with the info from the given application data and scheduler. * diff --git a/app/com/linkedin/drelephant/util/MemoryFormatUtils.java b/app/com/linkedin/drelephant/util/MemoryFormatUtils.java index b32f61fb9..6a89dc31c 100644 --- a/app/com/linkedin/drelephant/util/MemoryFormatUtils.java +++ b/app/com/linkedin/drelephant/util/MemoryFormatUtils.java @@ -94,7 +94,7 @@ public static long stringToBytes(String formattedString) { return 0L; } - Matcher matcher = REGEX_MATCHER.matcher(formattedString); + Matcher matcher = REGEX_MATCHER.matcher(formattedString.replace(",","")); if (!matcher.matches()) { throw new IllegalArgumentException( "The formatted string [" + formattedString + "] does not match with the regex /" + REGEX_MATCHER.toString() diff --git a/app/controllers/MetricsController.java b/app/controllers/MetricsController.java index 73b9d64fa..23a87a144 100644 --- a/app/controllers/MetricsController.java +++ b/app/controllers/MetricsController.java @@ -120,6 +120,12 @@ public Integer getValue() { return _retryQueueSize; } }); + _metricRegistry.register(name(className, "secondRetryQueue", "size"), new Gauge() { + @Override + public Integer getValue() { + return _secondRetryQueueSize; + } + }); _metricRegistry.registerAll(new CustomGarbageCollectorMetricSet()); _metricRegistry.registerAll(new MemoryUsageGaugeSet()); diff --git a/app/models/AppResult.java b/app/models/AppResult.java index e7c9c7f6c..cca925007 100644 --- a/app/models/AppResult.java +++ b/app/models/AppResult.java @@ -20,7 +20,6 @@ import com.linkedin.drelephant.analysis.Severity; import com.linkedin.drelephant.util.Utils; -import java.util.Date; import play.db.ebean.Model; import java.util.List; diff --git a/app/views/help/tony/helpTaskGPU.scala.html b/app/views/help/tony/helpTaskGPU.scala.html new file mode 100644 index 000000000..ec997ff93 --- /dev/null +++ b/app/views/help/tony/helpTaskGPU.scala.html @@ -0,0 +1,37 @@ +@* +* Copyright 2019 LinkedIn Corp. +* +* Licensed under the Apache License, Version 2.0 (the "License"); you may not +* use this file except in compliance with the License. You may obtain a copy of +* the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +* License for the specific language governing permissions and limitations under +* the License. +*@ +

+ This heuristic shows GPU utilization and GPU memory utilization for each task type. + Try to optimize your GPU utilization! +

+

+ GPU_UTILIZATION shows the percent of time over the past sample period during which one or more kernels was executing + on the GPU. +

+

+ GPU_FB_MEMORY_USAGE shows the on-board frame buffer memory usage in percentage. Note, reported total memory is + affected by ECC (error-correcting code) state. If ECC is enabled the total available memory is decreased by several + percent, due to the requisite parity bits. The driver may also reserve a small amount of memory for internal use, even + without active work on the GPU. +

+

+ GPU_MAIN_MEMORY_USAGE aka BAR1 memory usage shows the percentage of memory used to map the FB (device memory) so that + it can be directly accessed by CPU. +

+

+ Above metrics are collected via nvidia-smi tools installed on the host, for more detailed information please visit + nvidia-smi manual. +

\ No newline at end of file diff --git a/app/views/help/tony/helpTaskMemory.scala.html b/app/views/help/tony/helpTaskMemory.scala.html new file mode 100644 index 000000000..9970f99c3 --- /dev/null +++ b/app/views/help/tony/helpTaskMemory.scala.html @@ -0,0 +1,28 @@ +@* +* Copyright 2019 LinkedIn Corp. +* +* Licensed under the Apache License, Version 2.0 (the "License"); you may not +* use this file except in compliance with the License. You may obtain a copy of +* the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +* License for the specific language governing permissions and limitations under +* the License. +*@ +

+ This heuristic shows how much memory you requested for each task type and what the max memory used + by any instance of each task type actually was. If the requested memory is within some grace amount + (default 2 GB) of the max memory, then severity will be none. Otherwise, if the max memory used is + less than some percentage (by default 80, 70, 60, or 50 percent) of the requested memory, then + Dr. Elephant will indicate some severity (low, moderate, severe, or critical, respectively). +

+

+ To reduce the amount of memory requested for a task, you can update tony.X.memory, where + X is your task type. For example, to request 4 GB for your worker tasks, you can set + tony.worker.memory=4g. For more information on TonY configurations, please visit the + TonY Configurations Wiki page. +

\ No newline at end of file diff --git a/baseline.conf b/baseline.conf new file mode 100755 index 000000000..0e95ae787 --- /dev/null +++ b/baseline.conf @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# +# Copyright 2016 LinkedIn Corp. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +# + +# +# Configurations for threshold and baseline for various tools. +# + +# ********** Baseline/threshold numbers for Copy Paste Detector(CPD) ************* +# Threshold for CPD when run for Java +readonly JAVA_CPD_THRESHOLD=32 +# Threshold for CPD when run for Scala +readonly SCALA_CPD_THRESHOLD=0 + +# ******************* Baseline and Threshold numbers for Checkstyle ********************* +# Threshold for Checkstyle errors post which build would fail +readonly CHECKSTYLE_ERROR_THRESHOLD=1384 +# Baseline for Checkstyle warnings(build wont fail for warnings) +readonly CHECKSTYLE_WARNING_BASELINE=730 + + # ******************* Baseline and Threshold numbers for Scalastyle ********************* +# Threshold for Scalastyle errors post which build would fail +readonly SCALASTYLE_ERROR_THRESHOLD=274 +# Baseline for Scalastyle warnings(build wont fail for warnings) +readonly SCALASTYLE_WARNING_BASELINE=37 diff --git a/build.sbt b/build.sbt index fb36f05a2..eb24585b2 100644 --- a/build.sbt +++ b/build.sbt @@ -23,7 +23,10 @@ version := "2.1.7" organization := "com.linkedin.drelephant" -javacOptions in Compile ++= Seq("-source", "1.6", "-target", "1.6") +// Enable CPD SBT plugin +lazy val root = (project in file(".")).enablePlugins(CopyPasteDetector) + +javacOptions in Compile ++= Seq("-source", "1.8", "-target", "1.8") libraryDependencies ++= dependencies map { _.excludeAll(exclusionRules: _*) } @@ -37,4 +40,4 @@ playJavaSettings scalaVersion := "2.10.4" -envVars in Test := Map("PSO_DIR_PATH" -> (baseDirectory.value / "scripts/pso").getAbsolutePath) \ No newline at end of file +envVars in Test := Map("PSO_DIR_PATH" -> (baseDirectory.value / "scripts/pso").getAbsolutePath) diff --git a/checkstyle.sbt b/checkstyle.sbt new file mode 100644 index 000000000..8058dcae4 --- /dev/null +++ b/checkstyle.sbt @@ -0,0 +1,27 @@ +// +// Copyright 2016 LinkedIn Corp. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. +// + +// +// sbt-checkstyle-plugin specific configurations go in this file +// + +// Path and name of checkstyle configuration file +checkstyleConfigLocation := CheckstyleConfigLocation.File("project/checkstyle-config.xml") + +// Generate HTML report in addition to default XML report by applying XSLT transformations +checkstyleXsltTransformations := { + Some(Set(CheckstyleXSLTSettings(baseDirectory(_ / "project/checkstyle-noframes-severity-sorted-modified.xsl").value, target(_ / "checkstyle-report.html").value))) +} diff --git a/common.sh b/common.sh new file mode 100755 index 000000000..1a084404d --- /dev/null +++ b/common.sh @@ -0,0 +1,455 @@ +#!/usr/bin/env bash + +# +# Copyright 2016 LinkedIn Corp. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +# + +# +# This script contains common functions and constants which will be used by both +# compile.sh and travis.sh while running different tools. +# + +######################################################## +# +# Global constants +# +######################################################## +# Base path for most of the quality tool reports +readonly REPORTS_BASE_PATH="target/scala-2.10/" + +# ******************** Constants for Findbugs ********************* +# Default path for Findbugs report +readonly FINDBUGS_REPORT_PATH=$REPORTS_BASE_PATH"findbugs/report.xml" + +# ************* Constants for Copy Paste Detector(CPD) ************* +# CPD report resides in this path +readonly CPD_REPORT_BASE_PATH=$REPORTS_BASE_PATH"cpd/" +# Default path for CPD report +readonly CPD_REPORT_PATH=$CPD_REPORT_BASE_PATH"cpd.xml" + +# ******************* Constants for Checkstyle ********************* +# Path for Checkstyle report +readonly CHECKSTYLE_REPORT_PATH="target/checkstyle-report.xml" + +# ******************* Constants for Scalastyle ********************* +# Path for Scalastyle report +readonly SCALASTYLE_REPORT_PATH="target/scalastyle-result.xml" + +# ************************ Other constants ************************** +# Color coded prefixes for ERROR, WARNING, INFO and SUCCESS messages +readonly ERROR_COLOR_PREFIX="[\033[0;31mERROR\033[0m]" +readonly WARNING_COLOR_PREFIX="[\033[0;33mWARNING\033[0m]" +readonly INFO_COLOR_PREFIX="[\033[0;36mINFO\033[0m]" +readonly SUCCESS_COLOR_PREFIX="[\033[0;32mSUCCESS\033[0m]" + +########################################################## +# Get CPD report name based on language. +# +# Arguments: +# arg1: Report location +# arg2: Language for which CPD report willbe generated +# (Java or Scala) +# Returns: +# File name where CPD report will be written to. +########################################################## +function getCPDReportName() { + echo $1"cpd-"$2".xml" +} + +########################################################## +# Check if there is a failure due to duplicates in CPD +# report above the configured threshold for the language. +# +# Arguments: +# arg1: Language for which CPD is run (Java or Scala) +# arg2: Duplicates threshold for the language +# arg3: Name of the threshold constant for the language +# arg4: CPD report file which contains duplicates +# arg5: Flag which indicates whether to dump CPD report +# Report will be dumped if value of argument is 1 +# Returns: +# 0: Success +# 1: Failure due to threshold +# 2: Failure due to threshold variables not updated +########################################################## +function checkIfCPDFailed() { + duplicates=`grep "//g'| awk -F" " '{printf("\t%s: %s\n", $2,$1);}'` + resultCount=`echo "${result}" | wc -l | xargs` + echo -e "$WARNING_COLOR_PREFIX $4 $1 "$3"s detected. Top $resultCount issue types with their counts are as under:" + echo "${result}" +} + +########################################################## +# This function is called when build has to be failed +# because developer has fixed CPD or Checkstyle or +# Scalastyle issues but not updated the corresponding +# threshold variable in this script. +# +# Arguments: +# arg1: Report description +# arg2: Issue count +# arg3: Issue description +# arg4: Name of variable to be updated +# Returns: +# None +########################################################## +function handleSettingThresholdVariable() { + msg="$1 Report has $2 $3" + color=$ERROR_COLOR_PREFIX + failTheBuildMsg=", hence failing the build.\n\tPlease modify" + thresholdOrBaseline="threshold" + if [ $3 = "warnings" ]; then + color=$WARNING_COLOR_PREFIX + failTheBuildMsg=".\n\tYou can modify" + thresholdOrBaseline="baseline" + fi + echo -e "$color $msg and you have fixed some of them as part of this change which is great! But you forgot to update the $thresholdOrBaseline$failTheBuildMsg"\ + "$4 variable to $2 in baseline.conf to ensure that the new $thresholdOrBaseline takes effect for subsequent builds." +} + +################################################################## +# Check if there are warnings in the report for the tool whose +# report is being processed. If warnings exist, dump top 10 issues +# grouped by issue type. Return an integer indicating whether +# number of warnings are 0, above or below baseline. +# Also print messages if baseline variable has to be updated in +# baseline.conf. Number of warnings are also returned by setting +# an argument passed to the function. +# +# Arguments: +# arg1: Indicates the tool whose report will be processed +# (Checkstyle or Scalastyle) +# arg2: Report location for the tool whose report is to be +# processed +# arg3: Warnings baseline for the tool whose report will be +# processed +# arg4: Name of the warning baseline constant for the tool +# arg5: Argument which will be set equal to number of +# warnings found in the report. +# Returns: +# 0: Success +# 1: Warnings above baseline +# 2: Warnings fixed but baseline variable not updated +################################################################ +function checkStyleToolWarnings() { + # Local variable which references to arg5. + local __numWarnings=$5 + # Check if there are any warnings in the Checkstyle or Scalastyle report + local styleWarnings=`grep 'severity="warning"' $2 | wc -l | xargs` + # Effectively sets number of warnings to arg5 + eval $__numWarnings="'$styleWarnings'" + if [ $styleWarnings -gt 0 ]; then + dumpTop10StyleIssueTypes $1 $2 "warning" $styleWarnings + # Return number of warnings only if over baseline + if [ $styleWarnings -gt $3 ]; then + return 1; + elif [ $styleWarnings -lt $3 ]; then + handleSettingThresholdVariable $1 $styleWarnings "warnings" $4 + return 2; + fi + else + echo -e "$SUCCESS_COLOR_PREFIX $1 Report has no warnings..." + fi + return 0; +} + +################################################################## +# Process checkstyle/scalastyle report after the tool has been run +# This method will find how many errors exist. +# If errors exist and they are above threshold, fail the build. +# Fail the build even if errors have been fixed but threshold +# variable has not been updated in baseline.conf +# Print top 10 issues at error severity grouped by issue +# type if errors are equal to threshold (for informational +# purposes) +# +# Arguments: +# arg1: Indicates the tool whose report will be processed +# (Checkstyle or Scalastyle) +# arg2: Report location for the tool whose report is to be +# processed +# arg3: Error threshold, above which build would fail, for +# the tool whose report will be processed +# arg4: Name of the error threshold constant for the tool +# +# Returns: +# 0: Success +# 1: Failure due to errors above threshold +# 2: Failure due to errors fixed but threshold variable not +# updated. +################################################################## +function checkStyleToolErrors() { + # Check if there are any errors in the Checkstyle or Scalastyle report and fail the build, if above threshold + styleErrors=`grep 'severity="error"' $2 | wc -l | xargs` + if [ $styleErrors -gt $3 ]; then + echo -e "$ERROR_COLOR_PREFIX Build failed as the code change has introduced $1 ERRORS. $styleErrors found (threshold: $3)" + return 1; + fi + + # Print top 10 checkstyle/scalastyle error categories if number of errors within threshold + if [ $styleErrors -gt 0 ]; then + if [ $styleErrors -gt $3 ]; then + echo -e "$ERROR_COLOR_PREFIX Build failed as this code change has introduced $1 ERRORS. $styleErrors found (threshold: $3)" + return 1; + elif [ $styleErrors -eq $3 ]; then + dumpTop10StyleIssueTypes $1 $2 "error" $styleErrors + echo -e "$WARNING_COLOR_PREFIX Note: The code change may not have introduced $1 errors as count is within threshold. Not failing"\ + "the build." + return 0; + else + handleSettingThresholdVariable $1 $styleErrors "errors" $4 + return 2; + fi + else + if [ $3 -gt 0 ]; then + handleSettingThresholdVariable $1 $styleErrors "errors" $4 + return 2; + else + echo "" + echo -e "$SUCCESS_COLOR_PREFIX $1 Report has no errors..." + return 0; + fi + fi +} + +########################################################## +# Parse the findbugs report and if any bugs are found, +# fail the build. +# +# Arguments: +# None +# Returns: +# None +########################################################## +function checkFindbugsReport() { + # Check if there are any bugs in the Findbugs report + if [ ! -f $FINDBUGS_REPORT_PATH ]; then + echo -e "$ERROR_COLOR_PREFIX Findbugs report was not generated, failing the build..." + echo "" + exit 1; + fi + + # Incorrect report. Summary does not exist hence cannot parse the report. + summaryLine=`grep -i 'FindBugsSummary' $FINDBUGS_REPORT_PATH` + if [ -z "$summaryLine" ]; then + echo -e "$ERROR_COLOR_PREFIX Build failed as Findbugs summary could not be found in report..." + echo "" + exit 1; + fi + + # Fetch bugs from the report and if any bugs are found, fail the build. + totalBugs=`echo $summaryLine | grep -o 'total_bugs="[0-9]*'` + totalBugs=`echo $totalBugs | awk -F'="' '{print $2}'` + if [ $totalBugs -gt 0 ];then + echo -e "$ERROR_COLOR_PREFIX Build failed due to "$totalBugs" Findbugs issues..." + exit 1; + fi + echo -e "$INFO_COLOR_PREFIX Findbugs report generated at path $FINDBUGS_REPORT_PATH" +} + +########################################################## +# Scala CPD reports count even Apache license headers as +# duplicates. Remove them from the CPD report. +# +# Arguments: +# arg1: CPD report file to be checked for duplicates +# Returns: +# None +########################################################## +function removeLicenseHeaderDuplicates() { + mv $1 $1".bak" + # For each duplication start tag match, do the following + awk '{ p = 1 } / 0) { + tag = tag ORS $0; + # Remove section which contains the License + if (/Licensed under the Apache License/) { + p = 0; + } + # Break out of loop if duplication end tag matches + if (/<\/duplication>/) { + break; + } + } + $0 = tag + } p' $1".bak" > $1 + rm -rf $1".bak" +} + +########################################################## +# Change cpdLanguage setting in cpd.sbt from the passed +# language in first argument to language in second +# argument. +# Note: For consistency across platforms not using sed's +# -i option and instead redirecting output and moving +# files. +# +# Arguments: +# arg1: Language setting changed from +# arg2: Language setting changed to +# Returns: +# None +########################################################## +function changeCPDLanguageSetting() { + sed "s/$1/$2/g" cpd.sbt > cpd.sbt.bak + mv cpd.sbt.bak cpd.sbt +} + +############################################################ +# Generate a final scalastyle report by removing duplicate +# errors and sorting the results within a file by line +# number. +# +# Arguments: +# arg1: Report location for the tool being run +# Returns: +# None +############################################################ +function preProcessScalastyleReport() { + # Flag to indicate whether we are processing file tag i.e. we have encountered file begin tag but not the file end tag + filetag=0 + currentLineNum=0 + currentErrorTag="" + count=0 + while IFS='' read -r line || [[ -n "$line" ]]; do + if [[ $line == *"> $1.bak + filetag=1 + elif [[ $line == *""* ]]; then + # On end file tag, sort and find unique lines in tmpResult file(contains errors for a file). + # This is done to avoid duplicates + sortedResults=`cat tmpResult | sort -n -k 1 | uniq` + # Remove the line number prepended in tmpResult used for sorting errors by line number + finalResults=`echo "${sortedResults}" | sed 's/^[0-9]* / /g'` + # Copy errors for a file in sorted order and after removing duplicates + echo "${finalResults}" >> $1.bak + rm -rf tmpResult + # Copy file end tag as well + echo -e $line >> $1.bak + filetag=0 + elif [ $filetag -eq 1 ]; then + # We are processing errors inside a file + # Fetch line number from the corresponding attribute and prepend the line with line number + # This is done to ensure sorting of errors within a file by line number and removing duplicates, + # if any. Store this result in a tmpResult file + lineAttribute=`echo "$line" | sed -n 's/.* line="\([0-9.]*\).*/\1/p'` + if [[ $line == *"" ]]; then + echo -e $lineAttribute" "$line >> tmpResult + else + currentLineNum=$lineAttribute + currentErrorTag=$line + fi + elif [[ $line == *"/>" ]]; then + # Processing error tag. Encountered end of tag. + lineWithoutSpaces=`echo $line | sed 's/^[ ]*//g'` + echo -e "$currentLineNum $currentErrorTag $lineWithoutSpaces" >> tmpResult + fi + else + # Not inside file tag. Copy line as is. + echo -e $line >> $1.bak + fi + done< $1 + # Move the .bak file to the report file + mv $1.bak $1 +} diff --git a/compile.sh b/compile.sh index 6927db244..7616dcb1b 100755 --- a/compile.sh +++ b/compile.sh @@ -16,8 +16,34 @@ # the License. # -function print_usage(){ - echo "usage: ./compile.sh PATH_TO_CONFIG_FILE(optional)" +######################################################## +# +# Global constants +# +######################################################## + +# ******************** Constants for Checkstyle ********************* +# Path for Checkstyle HTML report +readonly CHECKSTYLE_HTML_REPORT_PATH="target/checkstyle-report.html" + +# ******************** Constants for Scalastyle ********************* +# Path for Scalastyle HTML report +readonly SCALASTYLE_HTML_REPORT_PATH="target/scalastyle-result.html" +# Path for Scalastyle HTML report generation python script +readonly SCALASTYLE_XSL_FILE="project/checkstyle-noframes-severity-sorted-modified.xsl" +# Path for Scalastyle HTML report generation python script +readonly SCALASTYLE_HTML_REPORT_GEN_SCRIPT="project/scalastyle_xml_to_html.py" + +function print_usage() { + echo "" + echo "Usage: ./compile.sh [config_file_path] [additional_options]" + echo " compile.sh takes optionally, custom configuration file path(denoted as config_file_path above) as first argument."\ + "This argument can't be at any other position." + echo " We can also, optionally pass, additional_options, in any order. Additional options are as under:" + echo -e "\tcoverage: Runs Jacoco code coverage and fails the build as per configured threshold" + echo -e "\tfindbugs: Runs Findbugs for Java code" + echo -e "\tcpd: Runs Copy Paste Detector(CPD) for Java and Scala code" + echo -e "\tstylechecks: Runs Checkstyle for Java and Scalastyle for Scala code" } function play_command() { @@ -39,53 +65,280 @@ function require_programs() { done if [ ! -z "$missing_programs" ]; then - echo "[ERROR] The following programs are required and are missing: $missing_programs" + echo -e "$ERROR_COLOR_PREFIX The following programs are required and are missing: $missing_programs" exit 1 else - echo "[SUCCESS] Program requirement is fulfilled!" + echo -e "$SUCCESS_COLOR_PREFIX Program requirement is fulfilled!" fi } -require_programs zip unzip +############################################################ +# Generate CPD report based on language in the report path. +# For Scala, also remove duplicates generated due to license +# header as they are false negatives. In the end, fail the +# build if failures are found. +# +# Arguments: +# arg1: Language (one of Java or Scala) +# arg2: Duplicates threshold for the language +# arg3: Name of the threshold constant for the language +# Returns: +# None +############################################################ +function processCPDReportByLanguage() { + cpd_result_file=$(getCPDReportName $CPD_REPORT_BASE_PATH $1) + mv $CPD_REPORT_PATH $cpd_result_file + if [ $1 = "Scala" ]; then + removeLicenseHeaderDuplicates $cpd_result_file + fi + echo "CPD report generated at path $cpd_result_file" + checkIfCPDFailed $1 $2 $3 $cpd_result_file "0" + result=$? + if [ $result -gt 0 ]; then + if [ $result -eq 2 ]; then + echo -e $(noteForUpdatingRepo)" and that can lead to CI failure..." + fi + echo "" + exit 1; + fi +} -# Default configurations -HADOOP_VERSION="2.3.0" -SPARK_VERSION="1.4.0" +########################################################## +# Run CPD for Java and Scala one by one. For Scala, first +# change cpdLanguage setting in cpd.sbt to Language.Scala +# and then run CPD. Ensure that specific CPD reports are +# generated for each language in the report folder. +# +# Arguments: +# arg1: Play command OPTS +# Returns: +# None +########################################################## +function runCPD() { + echo -e "$INFO_COLOR_PREFIX Running CPD for Java" + play_command $1 cpd + if [ $? -ne 0 ]; then + echo -e "$ERROR_COLOR_PREFIX CPD for Java failed" + exit 1; + fi + processCPDReportByLanguage "Java" $JAVA_CPD_THRESHOLD "JAVA_CPD_THRESHOLD" + + echo -e "$INFO_COLOR_PREFIX Running CPD for Scala" + changeCPDLanguageSetting "Language.Java" "Language.Scala" + play_command $OPTS cpd + if [ $? -ne 0 ]; then + # Reset language back to Java + changeCPDLanguageSetting "Language.Scala" "Language.Java" + echo -e "$ERROR_COLOR_PREFIX CPD for Scala failed" + exit 1; + fi + processCPDReportByLanguage "Scala" $SCALA_CPD_THRESHOLD "SCALA_CPD_THRESHOLD" + # Reset language back to Java + changeCPDLanguageSetting "Language.Scala" "Language.Java" +} -# User should pass an optional argument which is a path to config file -if [ -z "$1" ]; -then - echo "Using the default configuration" -else - CONF_FILE_PATH=$1 - echo "Using config file: "$CONF_FILE_PATH +########################################################## +# Note for updating repo before updating baseline.conf +# +# Arguments: +# None +# Returns: +# Note for updating repo +########################################################## +function noteForUpdatingRepo { + echo -e "$WARNING_COLOR_PREFIX Note: Make sure your local repo is up to date with the branch you want to merge to, otherwise threshold/baseline "\ + "values to be updated in baseline.conf\n\tmight be different" +} - # User must give a valid file as argument - if [ -f $CONF_FILE_PATH ]; - then - echo "Reading from config file..." - else - echo "error: Couldn't find a valid config file at: " $CONF_FILE_PATH - print_usage - exit 1 +############################################################ +# Process style report based on tool for which report is +# being processed. Verifies report existence, checks for +# warning baseline, checks for error threshold breach and +# if required fail the build or print appropriate message. +# +# Arguments: +# arg1: Indicates the tool whose report will be processed +# (Checkstyle or Scalastyle) +# arg2: Report location for the tool whose report is to be +# processed +# arg3: Error threshold, above which build would fail, for +# the tool whose report will be processed +# arg4: Warnings baseline for the tool whose report will be +# processed +# arg5: Name of the error threshold constant for the tool +# and language +# arg6: Name of the warning baseline constant for the tool +# and language +# Returns: +# None +############################################################ +function processStyleReport() { + verifyStyleReportExistence $1 $2 + + # Check warnings in Checkstyle/Scalastyle report + checkStyleToolWarnings $1 $2 $4 $6 numWarnings + result=$? + if [ $result -gt 0 ]; then + if [ $result -eq 1 ]; then + msgToResetStyleReportWarning $1 $4 $6 $numWarnings + fi + echo -e $(noteForUpdatingRepo)"..." fi + echo "" + + # Check errors in Checkstyle/Scalastyle report + checkStyleToolErrors $1 $2 $3 $5 + result=$? + if [ $result -gt 0 ]; then + if [ $result -eq 2 ]; then + echo -e $(noteForUpdatingRepo)" and that can lead to CI failure..." + fi + echo "" + exit 1; + fi + echo "" +} - source $CONF_FILE_PATH - - # Fetch the Hadoop version - if [ -n "${hadoop_version}" ]; then - HADOOP_VERSION=${hadoop_version} +############################################################ +# Process both Checkstyle and Scalastyle XML reports. Also +# generates Scalastyle HTML report(Checkstyle HTML report is +# automatically generated by checkstyle4sbt plugin). +# Fail the build if threshold values are breached. +# +# Arguments: +# None +# Returns: +# None +############################################################ +function processCheckstyleAndScalastyleReports() { + echo "" + echo -e "$INFO_COLOR_PREFIX Checking Checkstyle report..." + echo -e "$INFO_COLOR_PREFIX Checkstyle XML report generated at path: $CHECKSTYLE_REPORT_PATH and HTML report generated at path: $CHECKSTYLE_HTML_REPORT_PATH" + processStyleReport "Checkstyle" $CHECKSTYLE_REPORT_PATH $CHECKSTYLE_ERROR_THRESHOLD $CHECKSTYLE_WARNING_BASELINE "CHECKSTYLE_ERROR_THRESHOLD" "CHECKSTYLE_WARNING_BASELINE" + + scalastyleHtmlGenMsg="" + preProcessScalastyleReport $SCALASTYLE_REPORT_PATH + pythonVersion=`python --version 2>&1` + if [ $? -ne 0 ]; then + echo -e "$WARNING_COLOR_PREFIX Cannot generate Scalastyle HTML report as Python is unavailable. Install Python and add it in PATH" + else + # Generate Scalastyle HTML Report + rm -rf $SCALASTYLE_HTML_REPORT_PATH + echo "Using $pythonVersion" + pip install lxml + if [ $? -ne 0 ]; then + echo -e "$WARNING_COLOR_PREFIX Could not install lxml module for Python. Scalastyle HTML report could not be generated" + else + python $SCALASTYLE_HTML_REPORT_GEN_SCRIPT $SCALASTYLE_REPORT_PATH $SCALASTYLE_XSL_FILE $SCALASTYLE_HTML_REPORT_PATH + if [ $? -ne 0 ]; then + echo -e "$WARNING_COLOR_PREFIX Scalastyle HTML report could not be generated" + else + scalastyleHtmlGenMsg=" and HTML report generated at path: $SCALASTYLE_HTML_REPORT_PATH" + fi + fi fi + echo -e "$INFO_COLOR_PREFIX Checking Scalastyle report..." + echo -e "$INFO_COLOR_PREFIX Scalastyle XML report generated at path: $SCALASTYLE_REPORT_PATH"$scalastyleHtmlGenMsg + processStyleReport "Scalastyle" $SCALASTYLE_REPORT_PATH $SCALASTYLE_ERROR_THRESHOLD $SCALASTYLE_WARNING_BASELINE "SCALASTYLE_ERROR_THRESHOLD" "SCALASTYLE_WARNING_BASELINE" +} - # Fetch the Spark version - if [ -n "${spark_version}" ]; then - SPARK_VERSION=${spark_version} +############################################################# +# Run Checkstyle and Scalastyle and then process the report. +# Fail the build if the command fails or if threshold values +# are breached. +# +# Arguments: +# arg1: Play command OPTS +# Returns: +# None +############################################################# +function runStyleChecks() { + echo -e "$INFO_COLOR_PREFIX Running Checkstyle and Scalastyle" + play_command $1 checkstyle scalastyle + if [ $? -ne 0 ]; then + echo -e "$ERROR_COLOR_PREFIX Either Checkstyle or Scalastyle has failed" + echo "" + exit 1; fi + processCheckstyleAndScalastyleReports +} + +require_programs zip unzip + +# Default configurations +HADOOP_VERSION="2.3.0" +SPARK_VERSION="1.4.0" - # Fetch other play opts - if [ -n "${play_opts}" ]; then - PLAY_OPTS=${play_opts} + +extra_commands="" +# Indicates whether a custom configuration file is passed as first parameter. +custom_config="n" +run_CPD="n" +run_StyleChecks="n" +# Process command line arguments +while :; do + if [ ! -z $1 ]; then + case $1 in + coverage) + extra_commands=$extra_commands" jacoco:cover" + ;; + findbugs) + extra_commands=$extra_commands" findbugs" + ;; + cpd) + run_CPD="y" + ;; + stylechecks) + run_StyleChecks="y" + ;; + help) + print_usage + exit 0; + ;; + *) + # User may pass the first argument(optional) which is a path to config file + if [[ -z $extra_commands && $custom_config = "n" ]]; then + CONF_FILE_PATH=$1 + + # User must give a valid file as argument + if [ -f $CONF_FILE_PATH ]; then + echo "Using config file: "$CONF_FILE_PATH + else + echo "error: Couldn't find a valid config file at: " $CONF_FILE_PATH + print_usage + exit 1 + fi + + custom_config="y" + source $CONF_FILE_PATH + + # Fetch the Hadoop version + if [ -n "${hadoop_version}" ]; then + HADOOP_VERSION=${hadoop_version} + fi + + # Fetch the Spark version + if [ -n "${spark_version}" ]; then + SPARK_VERSION=${spark_version} + fi + + # Fetch other play opts + if [ -n "${play_opts}" ]; then + PLAY_OPTS=${play_opts} + fi + else + echo "Invalid option: $1" + print_usage + exit 1; + fi + esac + shift + else + break fi +done +if [ $custom_config = "n" ]; then + echo "Using the default configuration" fi echo "Hadoop Version : $HADOOP_VERSION" @@ -137,22 +390,54 @@ else fi trap "exit" SIGINT SIGTERM +set +x +set +v start_script=${project_root}/scripts/start.sh stop_script=${project_root}/scripts/stop.sh app_conf=${project_root}/app-conf pso_dir=${project_root}/scripts/pso +# Import baseline/threshold numbers used across compile.sh and travis.sh +source baseline.conf +# Import common functions used across compile.sh and travis.sh +source common.sh + +# Run the main command alongwith the extra commands passed as arguments to compile.sh +echo "Command is: play $OPTS clean compile test $extra_commands" +play_command $OPTS clean compile test $extra_commands +if [ $? -ne 0 ]; then + echo "Build failed..." + exit 1; +fi + +if [[ $extra_commands == *"findbugs"* ]]; then + # Parse and check findbugs report + checkFindbugsReport +fi + +# Run CPD if passed as an argument +if [ $run_CPD = "y" ]; then + runCPD $OPTS +fi + +# Run Checkstyle and Scalastyle if stylechecks is passed as an argument +if [ $run_StyleChecks = "y" ]; then + runStyleChecks $OPTS +fi + +set -v +set -ex # Echo the value of pwd in the script so that it is clear what is being removed. rm -rf ${project_root}/dist mkdir dist - -play_command $OPTS clean test compile jacoco:cover dist +# Run distribution +play_command $OPTS dist cd target/universal -ZIP_NAME=`/bin/ls *.zip` -unzip ${ZIP_NAME} +ZIP_NAME=`ls *.zip` +unzip -o ${ZIP_NAME} rm ${ZIP_NAME} DIST_NAME=${ZIP_NAME%.zip} @@ -167,7 +452,7 @@ cp $stop_script ${DIST_NAME}/bin/ cp -r $app_conf ${DIST_NAME} -mkdir ${DIST_NAME}/scripts/ +mkdir -p ${DIST_NAME}/scripts/ cp -r $pso_dir ${DIST_NAME}/scripts/ diff --git a/cpd.sbt b/cpd.sbt new file mode 100644 index 000000000..09e2ca855 --- /dev/null +++ b/cpd.sbt @@ -0,0 +1,28 @@ +// +// Copyright 2016 LinkedIn Corp. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. +// + +// +// cpd4sbt plugin settings for integrating with CPD which is used for code duplication +// +import de.johoop.cpd4sbt._ + +// By default language will be Java but this will be changed to run for Scala as well +// while running build through Travis CI. +cpdLanguage := Language.Java + +// Take distinct source directories to ensure whole file is not reported as duplicate +// of itself. +cpdSourceDirectories in Compile := (cpdSourceDirectories in Compile).value.distinct diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 533727a6e..d1fbc6e79 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -23,13 +23,14 @@ object Dependencies { lazy val commonsCodecVersion = "1.10" lazy val commonsIoVersion = "2.4" lazy val gsonVersion = "2.2.4" - lazy val guavaVersion = "18.0" // Hadoop defaultly are using guava 11.0, might raise NoSuchMethodException + lazy val guavaVersion = "18.0" // Hadoop by default uses Guava 11.0, might raise NoSuchMethodException lazy val jacksonMapperAslVersion = "1.7.3" lazy val jacksonVersion = "2.5.3" lazy val jerseyVersion = "2.24" lazy val jsoupVersion = "1.7.3" lazy val mysqlConnectorVersion = "5.1.36" lazy val oozieClientVersion = "4.2.0" + lazy val tonyVersion = "0.3.16" lazy val HADOOP_VERSION = "hadoopversion" lazy val SPARK_VERSION = "sparkversion" @@ -95,8 +96,11 @@ object Dependencies { "org.apache.httpcomponents" % "httpclient" % "4.5.2", "org.apache.httpcomponents" % "httpcore" % "4.4.4", "org.scalatest" %% "scalatest" % "3.0.0" % Test, - "com.h2database" % "h2" % "1.4.196" % Test - + "com.h2database" % "h2" % "1.4.196" % Test, + "com.linkedin.tony" % "tony-core" % tonyVersion excludeAll( + ExclusionRule(organization = "com.fasterxml.jackson.core"), + ExclusionRule(organization = "org.apache.hadoop") + ) ) :+ sparkExclusion var dependencies = Seq(javaJdbc, javaEbean, cache) diff --git a/project/build.properties b/project/build.properties index bb96499e0..2b4fb2038 100644 --- a/project/build.properties +++ b/project/build.properties @@ -14,4 +14,4 @@ # the License. # -sbt.version=0.13.2 \ No newline at end of file +sbt.version=0.13.9 diff --git a/project/checkstyle-config.xml b/project/checkstyle-config.xml new file mode 100644 index 000000000..187628b01 --- /dev/null +++ b/project/checkstyle-config.xml @@ -0,0 +1,364 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/project/checkstyle-java.header b/project/checkstyle-java.header new file mode 100644 index 000000000..659cce4e6 --- /dev/null +++ b/project/checkstyle-java.header @@ -0,0 +1,15 @@ +^/\*$ +^ \* Copyright \d{4} LinkedIn Corp.$ +^ \*$ +^ \* Licensed under the Apache License, Version 2.0 \(the "License"\); you may not$ +^ \* use this file except in compliance with the License. You may obtain a copy of$ +^ \* the License at$ +^ \*$ +^ \* http://www.apache.org/licenses/LICENSE-2.0$ +^ \*$ +^ \* Unless required by applicable law or agreed to in writing, software$ +^ \* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT$ +^ \* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the$ +^ \* License for the specific language governing permissions and limitations under$ +^ \* the License.$ +^ \*/$ diff --git a/project/checkstyle-noframes-severity-sorted-modified.xsl b/project/checkstyle-noframes-severity-sorted-modified.xsl new file mode 100644 index 000000000..b16a56cea --- /dev/null +++ b/project/checkstyle-noframes-severity-sorted-modified.xsl @@ -0,0 +1,269 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Scalastyle Report + Checkstyle Report + + + + + + + + + + + + + +
+ + + +

Scalastyle Audit

+

Checkstyle Audit

+
+
Designed for use with + + ScalaStyle. + CheckStyle. + +
+
+ + + +
+ + + +
+ + + +
+ + + +
+ + +

Files

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameErrorsWarningsInfos
+
+ + + + +

File

+ + + + + + + + + + + + + + + + + + + + + + +
SeverityError DescriptionLineColumn
+ Back to top +
+ + +

Summary

+ + + + + + + + + + + + + + + + + + +
FilesErrorsWarningsInfos
+
+ + + + a + b + + + + + + + error + warning + a + + +
diff --git a/project/checkstyle-suppressions.xml b/project/checkstyle-suppressions.xml new file mode 100644 index 000000000..e8718cc6b --- /dev/null +++ b/project/checkstyle-suppressions.xml @@ -0,0 +1,24 @@ + + + + + + + diff --git a/project/plugins.sbt b/project/plugins.sbt index cb7a66122..9536ee62a 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -17,7 +17,7 @@ logLevel := Level.Warn // The Typesafe repository -resolvers += "Typesafe repository" at "http://repo.typesafe.com/typesafe/releases/" +resolvers += "Typesafe repository" at "https://repo.typesafe.com/typesafe/releases/" // Use the Play sbt plugin for Play projects addSbtPlugin("com.typesafe.play" % "sbt-plugin" % Option(System.getProperty("play.version")).getOrElse("2.2.2")) @@ -27,3 +27,12 @@ addSbtPlugin("de.johoop" % "jacoco4sbt" % "2.1.6") // Findbugs plugin addSbtPlugin("de.johoop" % "findbugs4sbt" % "1.4.0") + +// Copy paste detector plugin +addSbtPlugin("de.johoop" % "cpd4sbt" % "1.2.0") + +// Checkstyle plugin +addSbtPlugin("com.etsy" % "sbt-checkstyle-plugin" % "3.1.1") + +// Scalastyle plugin +addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0") diff --git a/project/scalastyle-config.xml b/project/scalastyle-config.xml new file mode 100644 index 000000000..043a30f06 --- /dev/null +++ b/project/scalastyle-config.xml @@ -0,0 +1,343 @@ + + + + Scalastyle standard configuration + + + + + + + + + + + + + + + + + + + + + + + + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + mutable\.SynchronizedBuffer + + + + + + mutable\.SynchronizedMap + + + + + + mutable\.SynchronizedSet + + + + + + mutable\.SynchronizedQueue + + + + + + mutable\.SynchronizedPriorityQueue + + + + + + mutable\.SynchronizedStack + + + + + ^println$ + + + + JavaConversions + Instead of importing implicits in scala.collection.JavaConversions._, import + scala.collection.JavaConverters._ and use .asScala / .asJava methods + + + + throw new \w+Error\( + + + + + + + + + + + + + + + 2 + 2 + 2 + + + + + + + + + + + + + ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW + + + + + ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 2 + + + + + + + + + + + + + + + + PatDefOrDcl,TypeDefOrDcl,FunDefOrDcl,TmplDef + false + javadoc + + + + + + + + + + + + + + + + + diff --git a/project/scalastyle_xml_to_html.py b/project/scalastyle_xml_to_html.py new file mode 100755 index 000000000..da1c6965c --- /dev/null +++ b/project/scalastyle_xml_to_html.py @@ -0,0 +1,70 @@ +# +# Copyright 2016 LinkedIn Corp. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +# + +# +# This script is meant to convert Scalastyle XML report to Scalastyle HTML report using +# XSLT transformations. The XSL file is typically the same as the one used to convert +# Checkstyle XML report to HTML report. +# + +import lxml.etree as ET +import os +import sys +import traceback +import os.path +from os import path + +# Takes 3 arguments: Scalastyle XML report, XSL file name and HTML report name to be generated +if len(sys.argv) < 4: + print 'Too few arguments, please specify arguments as under:\n 1st argument: Scalastyle XML report file name \n 2nd', \ + 'argument: XSL file name\n 3rd argument: Scalastyle HTML report name to be generated...' + sys.exit(1) + +print 'Generating Scalastyle HTML report' + +# Check if input Scalastyle XML report exists +xmlReportFileName = sys.argv[1] +if not path.isfile(xmlReportFileName): + print 'Scalastyle XML report {0} not found. Cannot generate HTML report!'.format(xmlReportFileName) + sys.exit(1) + +# Check if input XSL which will be used to transform XML report exists +xslFileName = sys.argv[2] +if not path.isfile(xslFileName): + print 'XSL file {0} for Scalastyle XML report conversion not found. Cannot generate HTML report!'.format(xslFileName) + sys.exit(1) + +# HTML report name which will be outputted +htmlReportFileName = sys.argv[3] + +htmlReportFD = None +try: + xmlreport_root = ET.parse(xmlReportFileName) + xslt = ET.parse(xslFileName) + transform = ET.XSLT(xslt) + # Pass reporttype to XSL to ensure scalstyle specific changes can be made while outputting HTML report. + htmlreport_root = transform(xmlreport_root, reporttype="'scalastyle'") + htmlstring = ET.tostring(htmlreport_root, pretty_print=True) + htmlReportFD = os.open(htmlReportFileName, os.O_RDWR|os.O_CREAT) + os.write(htmlReportFD, htmlstring) +except: + print 'Issue encountered during Scalastyle HTML report generation...{0} occured.'.format(sys.exc_info()[0]) + desired_trace = traceback.format_exc(sys.exc_info()) + print(desired_trace) + sys.exit(1) +finally: + if htmlReportFD is not None: + os.close(htmlReportFD) diff --git a/scalastyle.sbt b/scalastyle.sbt new file mode 100644 index 000000000..429766342 --- /dev/null +++ b/scalastyle.sbt @@ -0,0 +1,27 @@ +// +// Copyright 2016 LinkedIn Corp. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. +// + +// +// scalastyle-sbt-plugin specific configurations go in this file +// + +// Do not fail on scalastyle errors as we want to baseline error numbers till +// we fix all errors. We would fail the CI build if any new errors are introduced +// through a PR. +scalastyleFailOnError := false + +// Scalastyle config file location. +scalastyleConfig := file("project/scalastyle-config.xml") diff --git a/scripts/start.sh b/scripts/start.sh index 6b16bfa10..a9ad0f0d9 100755 --- a/scripts/start.sh +++ b/scripts/start.sh @@ -98,8 +98,8 @@ db_loc="jdbc:mysql://"$db_url"/"$db_name"?characterEncoding=UTF-8" # db_password is optional. default is "" db_password="${db_password:-""}" -#port is optional. default is 8080 -port="${port:-8080}" +#http port is optional. default is 8080 +http_port="${http_port:-8080}" echo "http port: " $port # Check for keytab_user, keytab_location and application_secret in the elephant.conf @@ -169,7 +169,17 @@ else fi OPTS+=" $jvm_args -Djava.library.path=$JAVA_LIB_PATH" -OPTS+=" -Dhttp.port=$port" +OPTS+=" -Dhttp.port=$http_port" + +if [ -n "${https_port}" ]; then + echo "https port: " ${https_port} + echo "https_keystore_location: " ${https_keystore_location} + echo "https_keystore_type: " ${https_keystore_type} + + OPTS+=" -Dhttps.port=${https_port} -Dhttps.keyStore=${https_keystore_location} + -Dhttps.keyStoreType=${https_keystore_type} -Dhttps.keyStorePassword=${https_keystore_password}" +fi + OPTS+=" -Ddb.default.url=$db_loc -Ddb.default.user=$db_user -Ddb.default.password=$db_password" # set Java related options (e.g. -Xms1024m -Xmx1024m) diff --git a/test/com/linkedin/drelephant/analysis/AnalyticJobGeneratorHadoop2Test.java b/test/com/linkedin/drelephant/analysis/AnalyticJobGeneratorHadoop2Test.java new file mode 100644 index 000000000..e0bebbfb2 --- /dev/null +++ b/test/com/linkedin/drelephant/analysis/AnalyticJobGeneratorHadoop2Test.java @@ -0,0 +1,137 @@ +/* + * Copyright 2016 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.linkedin.drelephant.analysis; + +import java.util.ArrayList; +import java.util.ConcurrentModificationException; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import org.junit.Test; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; + + +/** + * Unit tests for class {@link AnalyticJobGeneratorHadoop2}. + */ +public class AnalyticJobGeneratorHadoop2Test { + /** + * Tests concurrent operations (fetch and add) on second retry queue. + */ + @Test + public void testSecondRetryQueueConcurrentOperations() { + final AnalyticJobGeneratorHadoop2 analyticJobGenerator = + new AnalyticJobGeneratorHadoop2(); + + // Latch to ensure operations on second retry queue from multiple threads + // run in parallel + final CountDownLatch latch = new CountDownLatch(1); + + // Add a job into second retry queue. + AnalyticJob job1 = spy(new AnalyticJob()); + // Custom answer on call to readyForSecondRetry for this job. + doAnswer(new Answer() { + @Override + public Boolean answer(final InvocationOnMock invocation) throws Throwable { + // Wait for additions to second retry queue from add jobs thread to + // begin. In case of synchronized access, wait will be for 5 sec. + // In case of unsynchronized access, this would lead to + // ConcurrentModificationException. + latch.await(5000L, TimeUnit.MILLISECONDS); + return true ; + } + }).when(job1).readyForSecondRetry(); + analyticJobGenerator.addIntoSecondRetryQueue(job1); + + // Add couple of other jobs to second retry queue. + AnalyticJob job2 = spy(new AnalyticJob()); + when(job2.readyForSecondRetry()).thenReturn(false); + analyticJobGenerator.addIntoSecondRetryQueue(job2); + + AnalyticJob job3 = spy(new AnalyticJob()); + when(job3.readyForSecondRetry()).thenReturn(true); + analyticJobGenerator.addIntoSecondRetryQueue(job3); + + final List appList = new ArrayList(); + // Flag to indicate if ConcurrentModificationException has been thrown. + final AtomicBoolean cmExceptionFlag = new AtomicBoolean(false); + // Start a fetch jobs thread which calls fetchJobsFromSecondRetryQueue + // method. + Thread fetchJobsThread = new Thread(new Runnable() { + @Override + public void run() { + try { + analyticJobGenerator.fetchJobsFromSecondRetryQueue(appList); + } catch (ConcurrentModificationException e) { + cmExceptionFlag.set(true); + } + } + }); + fetchJobsThread.start(); + + // Start a add jobs jobs thread which adds a couple of jobs into second + // retry queue while fetch jobs thread is running in parallel. + Thread addJobsThread = new Thread(new Runnable() { + @Override + public void run() { + AnalyticJob job4 = spy(new AnalyticJob()); + when(job4.readyForSecondRetry()).thenReturn(false); + analyticJobGenerator.addIntoSecondRetryQueue(job4); + + // Latch countdown to ensure fetch jobs thread can continue. + latch.countDown(); + + AnalyticJob job5 = spy(new AnalyticJob()); + when(job5.readyForSecondRetry()).thenReturn(true); + analyticJobGenerator.addIntoSecondRetryQueue(job5); + } + }); + addJobsThread.start(); + + // Wait for both the threads to finish. + try { + fetchJobsThread.join(); + addJobsThread.join(); + } catch (InterruptedException e) { + // Ignore the exception. + } + + // Concurrent operations from multiple threads should not lead to + // ConcurrentModificationException as accesses to second retry queue are + // synchronized. + assertFalse("ConcurrentModificationException should not have been thrown " + + "while fetching jobs", cmExceptionFlag.get()); + // Checking for apps >= 2 as the exact number can be 2 or 3 depending on + // order of invocation of threads. + assertTrue("Apps fetched from second retry queue should be >= 2.", + appList.size() >= 2); + + // Drain the second retry queue by fetching jobs from it. + analyticJobGenerator.fetchJobsFromSecondRetryQueue(appList); + assertEquals("Apps fetched from second retry queue should be 3.", 3, + appList.size()); + } +} diff --git a/test/com/linkedin/drelephant/spark/heuristics/ExecutorGcHeuristicTest.scala b/test/com/linkedin/drelephant/spark/heuristics/ExecutorGcHeuristicTest.scala index 869b9cb67..bd1ba9864 100644 --- a/test/com/linkedin/drelephant/spark/heuristics/ExecutorGcHeuristicTest.scala +++ b/test/com/linkedin/drelephant/spark/heuristics/ExecutorGcHeuristicTest.scala @@ -17,11 +17,10 @@ package com.linkedin.drelephant.spark.heuristics import scala.collection.JavaConverters -import com.linkedin.drelephant.analysis.{ApplicationType, Severity, SeverityThresholds} +import com.linkedin.drelephant.analysis.{ApplicationType, Severity} import com.linkedin.drelephant.configurations.heuristic.HeuristicConfigurationData -import com.linkedin.drelephant.spark.data.{SparkApplicationData, SparkLogDerivedData, SparkRestDerivedData} -import com.linkedin.drelephant.spark.fetchers.statusapiv1.{ApplicationInfoImpl, ExecutorSummaryImpl, StageDataImpl} -import org.apache.spark.scheduler.SparkListenerEnvironmentUpdate +import com.linkedin.drelephant.spark.data.{SparkApplicationData, SparkRestDerivedData} +import com.linkedin.drelephant.spark.fetchers.statusapiv1.{ApplicationInfoImpl, ExecutorSummaryImpl} import org.scalatest.{FunSpec, Matchers} import scala.concurrent.duration.Duration @@ -35,7 +34,8 @@ class ExecutorGcHeuristicTest extends FunSpec with Matchers { Map( "max_to_median_ratio_severity_thresholds" -> "1.414,2,4,16", "ignore_max_bytes_less_than_threshold" -> "4000000", - "ignore_max_millis_less_than_threshold" -> "4000001" + "ignore_max_millis_less_than_threshold" -> "4000001", + ExecutorGcHeuristic.GC_SEVERITY_D_THRESHOLDS_KEY -> "0.5,0.3,0.2,0.1" ) ) val executorGcHeuristic = new ExecutorGcHeuristic(heuristicConfigurationData) @@ -89,6 +89,11 @@ class ExecutorGcHeuristicTest extends FunSpec with Matchers { details.getName should include("Total Executor Runtime") details.getValue should be("4740000") } + + it("return Gc ratio low") { + val details = heuristicResultDetails.get(4) + details.getName should include("Gc ratio low") + } } } } diff --git a/test/com/linkedin/drelephant/tony/TonyMetricsAggregatorTest.java b/test/com/linkedin/drelephant/tony/TonyMetricsAggregatorTest.java new file mode 100644 index 000000000..2e760e8e9 --- /dev/null +++ b/test/com/linkedin/drelephant/tony/TonyMetricsAggregatorTest.java @@ -0,0 +1,146 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony; + +import com.google.common.collect.ImmutableList; +import com.linkedin.drelephant.analysis.ApplicationType; +import com.linkedin.drelephant.analysis.HadoopAggregatedData; +import com.linkedin.drelephant.math.Statistics; +import com.linkedin.drelephant.tony.data.TonyApplicationData; +import com.linkedin.tony.Constants; +import com.linkedin.tony.TonyConfigurationKeys; +import com.linkedin.tony.events.Event; +import com.linkedin.tony.events.EventType; +import com.linkedin.tony.events.Metric; +import com.linkedin.tony.events.TaskFinished; +import com.linkedin.tony.events.TaskStarted; +import com.linkedin.tony.rpc.impl.TaskStatus; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.junit.Assert; +import org.junit.Test; + +import static com.linkedin.drelephant.tony.TonyMetricsAggregator.MEMORY_BUFFER; + + +public class TonyMetricsAggregatorTest { + @Test + public void testMetricsAggregator() { + Configuration conf = new Configuration(false); + conf.set(TonyConfigurationKeys.getResourceKey(Constants.WORKER_JOB_NAME, Constants.MEMORY), "4g"); + conf.setInt(TonyConfigurationKeys.getInstancesKey(Constants.WORKER_JOB_NAME), 2); + conf.set(TonyConfigurationKeys.getResourceKey(Constants.PS_JOB_NAME, Constants.MEMORY), "4g"); + conf.setInt(TonyConfigurationKeys.getInstancesKey(Constants.PS_JOB_NAME), 1); + + List events = new ArrayList<>(); + events.add(new Event(EventType.TASK_STARTED, new TaskStarted(Constants.WORKER_JOB_NAME, 0, null),0L)); + events.add(new Event(EventType.TASK_STARTED, new TaskStarted(Constants.WORKER_JOB_NAME, 1, null),0L)); + events.add(new Event(EventType.TASK_STARTED, new TaskStarted(Constants.PS_JOB_NAME, 0, null),0L)); + events.add(new Event(EventType.TASK_FINISHED, + new TaskFinished(Constants.WORKER_JOB_NAME, 0, TaskStatus.SUCCEEDED.toString(), + ImmutableList.of(new Metric(Constants.MAX_MEMORY_BYTES, (double) FileUtils.ONE_GB))), + 10L * Statistics.SECOND_IN_MS)); + events.add(new Event(EventType.TASK_FINISHED, + new TaskFinished(Constants.WORKER_JOB_NAME, 1, TaskStatus.SUCCEEDED.toString(), + ImmutableList.of(new Metric(Constants.MAX_MEMORY_BYTES, (double) 2 * FileUtils.ONE_GB))), + 20L * Statistics.SECOND_IN_MS)); + events.add(new Event(EventType.TASK_FINISHED, + new TaskFinished(Constants.PS_JOB_NAME, 0, TaskStatus.SUCCEEDED.toString(), + ImmutableList.of(new Metric(Constants.MAX_MEMORY_BYTES, (double) FileUtils.ONE_GB))), + 20L * Statistics.SECOND_IN_MS)); + + long expectedResourcesUsed = 10 * 4 * 1024 + 20 * 4 * 1024 + 20 * 4 * 1024; + long expectedResourcesWasted = 10 * (long) (4 * 1024 - 2 * 1024 * MEMORY_BUFFER) + + 20 * (long) (4 * 1024 - 2 * 1024 * MEMORY_BUFFER) + + 20 * (long) (4 * 1024 - 1 * 1024 * MEMORY_BUFFER); + + ApplicationType appType = new ApplicationType(Constants.APP_TYPE); + TonyApplicationData data = new TonyApplicationData("application_123_456", appType, conf, events); + TonyMetricsAggregator metricsAggregator = new TonyMetricsAggregator(null); + metricsAggregator.aggregate(data); + HadoopAggregatedData result = metricsAggregator.getResult(); + Assert.assertEquals(expectedResourcesUsed, result.getResourceUsed()); + Assert.assertEquals(expectedResourcesWasted, result.getResourceWasted()); + } + + /** + * Verifies that wasted resources is 0 when there are no metrics. + */ + @Test + public void testNullMetrics() { + Configuration conf = new Configuration(false); + conf.set(TonyConfigurationKeys.getResourceKey(Constants.WORKER_JOB_NAME, Constants.MEMORY), "4g"); + conf.setInt(TonyConfigurationKeys.getInstancesKey(Constants.WORKER_JOB_NAME), 2); + conf.set(TonyConfigurationKeys.getResourceKey(Constants.PS_JOB_NAME, Constants.MEMORY), "4g"); + conf.setInt(TonyConfigurationKeys.getInstancesKey(Constants.PS_JOB_NAME), 1); + + List events = new ArrayList<>(); + events.add(new Event(EventType.TASK_STARTED, new TaskStarted(Constants.WORKER_JOB_NAME, 0, null),0L)); + events.add(new Event(EventType.TASK_STARTED, new TaskStarted(Constants.WORKER_JOB_NAME, 1, null),0L)); + events.add(new Event(EventType.TASK_STARTED, new TaskStarted(Constants.PS_JOB_NAME, 0, null),0L)); + events.add(new Event(EventType.TASK_FINISHED, + new TaskFinished(Constants.WORKER_JOB_NAME, 0, TaskStatus.SUCCEEDED.toString(), Collections.emptyList()), + 10L * Statistics.SECOND_IN_MS)); + events.add(new Event(EventType.TASK_FINISHED, + new TaskFinished(Constants.WORKER_JOB_NAME, 1, TaskStatus.SUCCEEDED.toString(), Collections.emptyList()), + 20L * Statistics.SECOND_IN_MS)); + events.add(new Event(EventType.TASK_FINISHED, + new TaskFinished(Constants.PS_JOB_NAME, 0, TaskStatus.SUCCEEDED.toString(), Collections.emptyList()), + 20L * Statistics.SECOND_IN_MS)); + + long expectedResourcesUsed = 10 * 4 * 1024 + 20 * 4 * 1024 + 20 * 4 * 1024; + long expectedResourcesWasted = 0; + + ApplicationType appType = new ApplicationType(Constants.APP_TYPE); + TonyApplicationData data = new TonyApplicationData("application_123_456", appType, conf, events); + TonyMetricsAggregator metricsAggregator = new TonyMetricsAggregator(null); + metricsAggregator.aggregate(data); + HadoopAggregatedData result = metricsAggregator.getResult(); + Assert.assertEquals(expectedResourcesUsed, result.getResourceUsed()); + Assert.assertEquals(expectedResourcesWasted, result.getResourceWasted()); + } + + /** + * Verifies that used and wasted resources are 0 when there are no task finished or application finished events. + */ + @Test + public void testNoEndEvents() { + Configuration conf = new Configuration(false); + conf.set(TonyConfigurationKeys.getResourceKey(Constants.WORKER_JOB_NAME, Constants.MEMORY), "4g"); + conf.setInt(TonyConfigurationKeys.getInstancesKey(Constants.WORKER_JOB_NAME), 2); + conf.set(TonyConfigurationKeys.getResourceKey(Constants.PS_JOB_NAME, Constants.MEMORY), "4g"); + conf.setInt(TonyConfigurationKeys.getInstancesKey(Constants.PS_JOB_NAME), 1); + + List events = new ArrayList<>(); + events.add(new Event(EventType.TASK_STARTED, new TaskStarted(Constants.WORKER_JOB_NAME, 0, null),0L)); + events.add(new Event(EventType.TASK_STARTED, new TaskStarted(Constants.WORKER_JOB_NAME, 1, null),0L)); + events.add(new Event(EventType.TASK_STARTED, new TaskStarted(Constants.PS_JOB_NAME, 0, null),0L)); + + long expectedResourcesUsed = 0; + long expectedResourcesWasted = 0; + + ApplicationType appType = new ApplicationType(Constants.APP_TYPE); + TonyApplicationData data = new TonyApplicationData("application_123_456", appType, conf, events); + TonyMetricsAggregator metricsAggregator = new TonyMetricsAggregator(null); + metricsAggregator.aggregate(data); + HadoopAggregatedData result = metricsAggregator.getResult(); + Assert.assertEquals(expectedResourcesUsed, result.getResourceUsed()); + Assert.assertEquals(expectedResourcesWasted, result.getResourceWasted()); + } +} diff --git a/test/com/linkedin/drelephant/tony/fetchers/TonyFetcherTest.java b/test/com/linkedin/drelephant/tony/fetchers/TonyFetcherTest.java new file mode 100644 index 000000000..caf495be5 --- /dev/null +++ b/test/com/linkedin/drelephant/tony/fetchers/TonyFetcherTest.java @@ -0,0 +1,148 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony.fetchers; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.io.Files; +import com.linkedin.drelephant.analysis.AnalyticJob; +import com.linkedin.drelephant.analysis.ApplicationType; +import com.linkedin.drelephant.configurations.fetcher.FetcherConfigurationData; +import com.linkedin.drelephant.tony.data.TonyApplicationData; +import com.linkedin.drelephant.tony.data.TonyTaskData; +import com.linkedin.tony.Constants; +import com.linkedin.tony.TonyConfigurationKeys; +import com.linkedin.tony.events.Event; +import com.linkedin.tony.events.EventType; +import com.linkedin.tony.events.Metric; +import com.linkedin.tony.events.TaskFinished; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Map; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.specific.SpecificDatumWriter; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + + +public class TonyFetcherTest { + private static final String APPLICATION_ID_1 = "application_123_456"; + private static final String APPLICATION_ID_2 = "application_789_101"; + private static File _intermediateDir; + private static File _finishedDir; + private static String _tonyConfDir; + private static Date _endDate; + + @BeforeClass + public static void setup() throws IOException, ParseException { + setupTestData(); + setupTestTonyConfDir(); + } + + private static void setupTestData() throws IOException, ParseException { + String yearMonthDay = "2019/05/02"; + SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd"); + _endDate = sdf.parse(yearMonthDay); + + File tempDir = Files.createTempDir(); + _intermediateDir = new File(tempDir, "intermediate"); + _finishedDir = new File(tempDir, "finished"); + + createAppHistoryDir(new File(_intermediateDir, APPLICATION_ID_1)); + createAppHistoryDir(new File(_finishedDir, yearMonthDay + Path.SEPARATOR + APPLICATION_ID_2)); + } + + private static void createAppHistoryDir(File appDir) throws IOException { + appDir.mkdirs(); + + // write fake config history file + Configuration conf = new Configuration(false); + conf.set("foo", "bar"); + File configFile = new File(appDir, Constants.TONY_FINAL_XML); + conf.writeXml(new FileOutputStream(configFile)); + + // create fake events + Event event0 = new Event(EventType.TASK_FINISHED, new TaskFinished("worker", 0, "SUCCEEDED", + ImmutableList.of(new Metric("my_metric", 0.0))), System.currentTimeMillis()); + Event event1 = new Event(EventType.TASK_FINISHED, new TaskFinished("worker", 1, "SUCCEEDED", + ImmutableList.of(new Metric("my_metric", 1.0))), System.currentTimeMillis()); + Event event2 = new Event(EventType.TASK_FINISHED, new TaskFinished("ps", 0, "SUCCEEDED", + ImmutableList.of(new Metric("my_metric", 0.0))), System.currentTimeMillis()); + + // write fake events history file + File eventFile = new File(appDir, + APPLICATION_ID_1 + "-0-" + _endDate.getTime() + "-user1-SUCCEEDED." + Constants.HISTFILE_SUFFIX); + DatumWriter userDatumWriter = new SpecificDatumWriter<>(Event.class); + DataFileWriter dataFileWriter = new DataFileWriter<>(userDatumWriter); + dataFileWriter.create(event0.getSchema(), eventFile); + dataFileWriter.append(event0); + dataFileWriter.append(event1); + dataFileWriter.append(event2); + dataFileWriter.close(); + } + + private static void setupTestTonyConfDir() throws IOException { + Configuration testTonyConf = new Configuration(false); + testTonyConf.set(TonyConfigurationKeys.TONY_HISTORY_INTERMEDIATE, _intermediateDir.getPath()); + testTonyConf.set(TonyConfigurationKeys.TONY_HISTORY_FINISHED, _finishedDir.getPath()); + + File confDir = Files.createTempDir(); + _tonyConfDir = confDir.getPath(); + File tonySiteFile = new File(confDir, Constants.TONY_SITE_CONF); + testTonyConf.writeXml(new FileOutputStream(tonySiteFile)); + } + + @Test + public void testFetchDataIntermediateDir() throws Exception { + testHelper(APPLICATION_ID_1); + } + + @Test + public void testFetchDataFinishedDir() throws Exception { + testHelper(APPLICATION_ID_2); + } + + private static void testHelper(String appId) throws Exception { + FetcherConfigurationData configData = new FetcherConfigurationData(null, null, + ImmutableMap.of(Constants.TONY_CONF_DIR, _tonyConfDir, + TonyConfigurationKeys.TONY_HISTORY_FINISHED_DIR_TIMEZONE, + TonyConfigurationKeys.DEFAULT_TONY_HISTORY_FINISHED_DIR_TIMEZONE)); + TonyFetcher tonyFetcher = new TonyFetcher(configData); + + AnalyticJob job = new AnalyticJob(); + ApplicationType tonyAppType = new ApplicationType(Constants.APP_TYPE); + job.setFinishTime(_endDate.getTime()); + job.setAppId(appId); + job.setAppType(tonyAppType); + TonyApplicationData appData = tonyFetcher.fetchData(job); + + Assert.assertEquals(appId, appData.getAppId()); + Assert.assertEquals(tonyAppType, appData.getApplicationType()); + Assert.assertEquals("bar", appData.getConf().get("foo")); + Map> metricsMap = appData.getTaskMap(); + Assert.assertEquals(2, metricsMap.size()); + Assert.assertEquals(2, metricsMap.get("worker").size()); + Assert.assertEquals(1, metricsMap.get("ps").size()); + } +} \ No newline at end of file diff --git a/test/com/linkedin/drelephant/tony/heuristics/TaskMemoryHeuristicTest.java b/test/com/linkedin/drelephant/tony/heuristics/TaskMemoryHeuristicTest.java new file mode 100644 index 000000000..0a82bac96 --- /dev/null +++ b/test/com/linkedin/drelephant/tony/heuristics/TaskMemoryHeuristicTest.java @@ -0,0 +1,202 @@ +/* + * Copyright 2019 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.linkedin.drelephant.tony.heuristics; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.linkedin.drelephant.analysis.ApplicationType; +import com.linkedin.drelephant.analysis.HeuristicResult; +import com.linkedin.drelephant.analysis.Severity; +import com.linkedin.drelephant.configurations.heuristic.HeuristicConfigurationData; +import com.linkedin.drelephant.tony.data.TonyApplicationData; +import com.linkedin.drelephant.util.Utils; +import com.linkedin.tony.Constants; +import com.linkedin.tony.TonyConfigurationKeys; +import com.linkedin.tony.events.Event; +import com.linkedin.tony.events.EventType; +import com.linkedin.tony.events.Metric; +import com.linkedin.tony.events.TaskFinished; +import com.linkedin.tony.rpc.impl.TaskStatus; +import controllers.Application; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.junit.Assert; +import org.junit.Test; + + +public class TaskMemoryHeuristicTest { + + /** + * 10g workers requested, max worker memory < 50% + */ + @Test + public void testCritical() { + testHelper( + ImmutableMap.of(Constants.WORKER_JOB_NAME, new double[]{ + 1.2e9, + 1.1e9, + 1e9, + 1.3e9 + }, Constants.PS_JOB_NAME, new double[]{0.5e9}), + ImmutableMap.of(Constants.WORKER_JOB_NAME, "10g", Constants.PS_JOB_NAME, "2g"), + Severity.CRITICAL, + Severity.CRITICAL.getValue() * 4 + ); + } + + /** + * 10g workers requested, max worker memory < 70%; 10g ps requested, max ps memory < 60% + */ + @Test + public void testSevere() { + testHelper( + ImmutableMap.of(Constants.WORKER_JOB_NAME, new double[]{ + 6.5e9, + 6.6e9, + }, Constants.PS_JOB_NAME, new double[]{5.84e9}), + ImmutableMap.of(Constants.WORKER_JOB_NAME, "10g", Constants.PS_JOB_NAME, "10g"), + Severity.SEVERE, + Severity.MODERATE.getValue() * 2 /* workers */ + Severity.SEVERE.getValue() * 1 + ); + } + + /** + * 10g workers requested, max worker memory < 70% + */ + @Test + public void testModerate() { + testHelper( + ImmutableMap.of(Constants.WORKER_JOB_NAME, new double[]{ + 6.5e9, + 6.6e9, + }), + ImmutableMap.of(Constants.WORKER_JOB_NAME, "10g"), + Severity.MODERATE, + Severity.MODERATE.getValue() * 2 + ); + } + + /** + * 10g workers requested, max worker memory < 80% + */ + @Test + public void testLow() { + testHelper( + ImmutableMap.of(Constants.WORKER_JOB_NAME, new double[]{ + 7.56e9, + 7.45e9, + }), + ImmutableMap.of(Constants.WORKER_JOB_NAME, "10g"), + Severity.LOW, + 0 + ); + } + + /** + * 10g workers requested, max worker memory > 80% + */ + @Test + public void testNone() { + testHelper( + ImmutableMap.of(Constants.WORKER_JOB_NAME, new double[]{ + 8.5e9, + 8.6e9, + }), + ImmutableMap.of(Constants.WORKER_JOB_NAME, "10g"), + Severity.NONE, + 0 + ); + } + + /** + * Low memory utilization but default container size, so pass. + */ + @Test + public void testLowUtilizationDefaultContainerSize() { + testHelper( + ImmutableMap.of(Constants.WORKER_JOB_NAME, new double[]{ + 0.5e9, + 0.6e9, + }), + ImmutableMap.of(Constants.WORKER_JOB_NAME, "2g"), + Severity.NONE, + 0 + ); + } + + /** + * Though memory utilization is about 50%, severity should be none + * because requested memory is within the default 2 GB grace headroom of the + * max used memory. + */ + @Test + public void testRequestedSizeWithinGraceHeadroomSeverity() { + testHelper( + ImmutableMap.of(Constants.WORKER_JOB_NAME, new double[]{ + 1.5e9, + 1.6e9, + }), + ImmutableMap.of(Constants.WORKER_JOB_NAME, "3g"), + Severity.NONE, + 0 + ); + } + + /** + * Verifies that no exception is thrown when the task map is empty. + */ + @Test + public void testEmptyTaskMap() { + ApplicationType appType = new ApplicationType(Constants.APP_TYPE); + Configuration conf = new Configuration(false); + conf.setInt(TonyConfigurationKeys.getInstancesKey(Constants.PS_JOB_NAME), 0); + TonyApplicationData data = new TonyApplicationData("application_123_456", + appType, conf, Collections.EMPTY_LIST); + new TaskMemoryHeuristic(new HeuristicConfigurationData("ignored", + "ignored", "ignored", appType, Collections.EMPTY_MAP)).apply(data); + + } + + public void testHelper(Map memUsed, Map memRequested, Severity expectedSeverity, + int expectedScore) { + Configuration conf = new Configuration(false); + List events = new ArrayList<>(); + for (Map.Entry entry : memRequested.entrySet()) { + String taskType = entry.getKey(); + conf.set(TonyConfigurationKeys.getResourceKey(taskType, Constants.MEMORY), entry.getValue()); + conf.setInt(TonyConfigurationKeys.getInstancesKey(taskType), memUsed.get(taskType).length); + + for (int i = 0; i < memUsed.get(taskType).length; i++) { + events.add(new Event(EventType.TASK_FINISHED, + new TaskFinished(taskType, i, TaskStatus.SUCCEEDED.toString(), + ImmutableList.of(new Metric(Constants.MAX_MEMORY_BYTES, memUsed.get(taskType)[i]))), + System.currentTimeMillis())); + } + } + + ApplicationType appType = new ApplicationType(Constants.APP_TYPE); + TonyApplicationData data = new TonyApplicationData("application_123_456", appType, conf, events); + + TaskMemoryHeuristic heuristic = new TaskMemoryHeuristic(new HeuristicConfigurationData("ignored", + "ignored", "ignored", appType, Collections.EMPTY_MAP)); + HeuristicResult result = heuristic.apply(data); + Assert.assertEquals(expectedSeverity, result.getSeverity()); + Assert.assertEquals(expectedScore, result.getScore()); + } +} diff --git a/test/com/linkedin/drelephant/tony/util/TonyUtilsTest.java b/test/com/linkedin/drelephant/tony/util/TonyUtilsTest.java new file mode 100644 index 000000000..e305e9edd --- /dev/null +++ b/test/com/linkedin/drelephant/tony/util/TonyUtilsTest.java @@ -0,0 +1,122 @@ +package com.linkedin.drelephant.tony.util; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.linkedin.drelephant.tony.data.TonyTaskData; +import com.linkedin.tony.Constants; +import com.linkedin.tony.events.Metric; +import java.util.Map; +import java.util.TreeMap; +import org.junit.Assert; +import org.junit.Test; + + +public class TonyUtilsTest { + /** + * Worker 0 is missing metrics, but worker 1 has metrics; we should use worker 1's + * max memory metrics. + */ + @Test + public void testGetMaxMemorySomeTasksMissingMetrics() { + Map taskDataMap = new TreeMap<>(); + TonyTaskData worker0Data = new TonyTaskData("worker", 0); + TonyTaskData worker1Data = new TonyTaskData("worker", 1); + double worker1MaxMemoryBytes = 123d; + worker1Data.setMetrics(ImmutableList.of(new Metric(Constants.MAX_MEMORY_BYTES, worker1MaxMemoryBytes))); + + taskDataMap.put(0, worker0Data); + taskDataMap.put(1, worker1Data); + + Assert.assertEquals(worker1MaxMemoryBytes, + TonyUtils.getMaxMetricForTaskTypeAndMetricName(ImmutableMap.of("worker", taskDataMap), "worker", + Constants.MAX_MEMORY_BYTES), 0); + } + + @Test + public void testGetMaxMetricForTaskTypeAndMetricNameMissingTask() { + Map taskDataMap = new TreeMap<>(); + TonyTaskData worker0Data = new TonyTaskData("worker", 0); + + taskDataMap.put(0, worker0Data); + + Assert.assertEquals(-1.0d, + TonyUtils.getMaxMetricForTaskTypeAndMetricName(ImmutableMap.of("worker", taskDataMap), "ps", + Constants.MAX_MEMORY_BYTES), 0); + } + + @Test + public void testGetMaxMetricForTaskTypeAndMetricName() { + Map taskDataMap = new TreeMap<>(); + TonyTaskData worker0Data = new TonyTaskData("worker", 0); + TonyTaskData worker1Data = new TonyTaskData("worker", 1); + + double worker0MaxGPUUtilization = 20.0d; + double worker1MaxGPUUtilization = 21.0d; + double worker0MaxGPUFBMemoryUsage = 22.0d; + double worker1MaxGPUFBMemoryUsage = 23.0d; + double worker0MaxGPUMainMemoryUsage = 2.0d; + double worker1MaxGPUMainMemoryUsage = -1.0d; + + worker0Data.setMetrics(ImmutableList.of( + new Metric(Constants.MAX_GPU_UTILIZATION, worker0MaxGPUUtilization), + new Metric(Constants.MAX_GPU_FB_MEMORY_USAGE, worker0MaxGPUFBMemoryUsage), + new Metric(Constants.MAX_GPU_MAIN_MEMORY_USAGE, worker0MaxGPUMainMemoryUsage) + )); + worker1Data.setMetrics(ImmutableList.of( + new Metric(Constants.MAX_GPU_UTILIZATION, worker1MaxGPUUtilization), + new Metric(Constants.MAX_GPU_FB_MEMORY_USAGE, worker1MaxGPUFBMemoryUsage), + new Metric(Constants.MAX_GPU_MAIN_MEMORY_USAGE, worker1MaxGPUMainMemoryUsage) + )); + + taskDataMap.put(0, worker0Data); + taskDataMap.put(1, worker1Data); + + Assert.assertEquals(worker1MaxGPUUtilization, + TonyUtils.getMaxMetricForTaskTypeAndMetricName(ImmutableMap.of("worker", taskDataMap), "worker", + Constants.MAX_GPU_UTILIZATION), 0); + Assert.assertEquals(worker1MaxGPUFBMemoryUsage, + TonyUtils.getMaxMetricForTaskTypeAndMetricName(ImmutableMap.of("worker", taskDataMap), "worker", + Constants.MAX_GPU_FB_MEMORY_USAGE), 0); + Assert.assertEquals(worker0MaxGPUMainMemoryUsage, + TonyUtils.getMaxMetricForTaskTypeAndMetricName(ImmutableMap.of("worker", taskDataMap), "worker", + Constants.MAX_GPU_MAIN_MEMORY_USAGE), 0); + } + + @Test + public void testGetAvgMetricForTaskTypeAndMetricName() { + Map taskDataMap = new TreeMap<>(); + TonyTaskData worker0Data = new TonyTaskData("worker", 0); + TonyTaskData worker1Data = new TonyTaskData("worker", 1); + + double worker0AvgGPUUtilization = 10.0d; + double worker1AvgGPUUtilization = 20.0d; + double worker0AvgGPUFBMemoryUsage = 30.0d; + double worker1AvgGPUFBMemoryUsage = 0.0d; + double worker0AvgGPUMainMemoryUsage = 40.0d; + double worker1AvgGPUMainMemoryUsage = -1.0d; + + worker0Data.setMetrics(ImmutableList.of( + new Metric(Constants.AVG_GPU_UTILIZATION, worker0AvgGPUUtilization), + new Metric(Constants.AVG_GPU_FB_MEMORY_USAGE, worker0AvgGPUFBMemoryUsage), + new Metric(Constants.AVG_GPU_MAIN_MEMORY_USAGE, worker0AvgGPUMainMemoryUsage)) + ); + worker1Data.setMetrics(ImmutableList.of( + new Metric(Constants.AVG_GPU_UTILIZATION, worker1AvgGPUUtilization), + new Metric(Constants.AVG_GPU_FB_MEMORY_USAGE, worker1AvgGPUFBMemoryUsage), + new Metric(Constants.AVG_GPU_MAIN_MEMORY_USAGE, worker1AvgGPUMainMemoryUsage) + )); + + taskDataMap.put(0, worker0Data); + taskDataMap.put(1, worker1Data); + + Assert.assertEquals(15.0d, + TonyUtils.getAvgMetricForTaskTypeAndMetricName(ImmutableMap.of("worker", taskDataMap), "worker", + Constants.AVG_GPU_UTILIZATION), 0); + Assert.assertEquals(30.0d, + TonyUtils.getAvgMetricForTaskTypeAndMetricName(ImmutableMap.of("worker", taskDataMap), "worker", + Constants.AVG_GPU_FB_MEMORY_USAGE), 0); + Assert.assertEquals(40.0d, + TonyUtils.getAvgMetricForTaskTypeAndMetricName(ImmutableMap.of("worker", taskDataMap), "worker", + Constants.AVG_GPU_MAIN_MEMORY_USAGE), 0); + } +} diff --git a/test/com/linkedin/drelephant/util/MemoryFormatUtilsTest.java b/test/com/linkedin/drelephant/util/MemoryFormatUtilsTest.java index 0ae064ebc..d7de08a11 100644 --- a/test/com/linkedin/drelephant/util/MemoryFormatUtilsTest.java +++ b/test/com/linkedin/drelephant/util/MemoryFormatUtilsTest.java @@ -50,6 +50,8 @@ public void testScientificStringNumberToBytes() { assertEquals(expectedBytes, MemoryFormatUtils.stringToBytes("0.879e1 MB")); assertEquals(expectedBytes, MemoryFormatUtils.stringToBytes("+0.879e+1 MB")); assertEquals(expectedBytes, MemoryFormatUtils.stringToBytes("87.9e-1 MB")); + assertEquals(expectedBytes, MemoryFormatUtils.stringToBytes("8,790e-3 MB")); + assertEquals(expectedBytes, MemoryFormatUtils.stringToBytes("8,790,000e-6 MB")); } public void testStringToBytes() { diff --git a/test/common/TestConstants.java b/test/common/TestConstants.java index cb58cc1c0..9541c37a5 100644 --- a/test/common/TestConstants.java +++ b/test/common/TestConstants.java @@ -60,6 +60,9 @@ public class TestConstants { public static final String APPLY_EVOLUTIONS_DEFAULT_KEY = "applyEvolutions.default"; public static final String APPLY_EVOLUTIONS_DEFAULT_VALUE = "true"; + // Application configurations + public static final String METRICS_ENABLE_KEY = "metrics"; + // Paths to the rest end-points public static final String REST_APP_RESULT_PATH = "/rest/job"; public static final String REST_JOB_EXEC_RESULT_PATH = "/rest/jobexec"; @@ -73,6 +76,9 @@ public class TestConstants { public static final String REST_USER_RESOURCE_USAGE_PATH = "/rest/resourceusagedatabyuser"; public static final String REST_GET_CURRENT_RUN_PARAMETERS = "/rest/getCurrentRunParameters"; + // Paths to metrics end-points. + public static final String METRICS_ENDPOINT = "/metrics"; + public static final String DEFAULT_ENCODING = "UTF-8"; //Paths to the UI rest end points diff --git a/test/controllers/MetricsControllerTest.java b/test/controllers/MetricsControllerTest.java new file mode 100644 index 000000000..f129ecb67 --- /dev/null +++ b/test/controllers/MetricsControllerTest.java @@ -0,0 +1,101 @@ +/* + * Copyright 2016 LinkedIn Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package controllers; + +import com.fasterxml.jackson.databind.JsonNode; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import org.junit.Before; +import org.junit.Test; +import play.libs.WS; +import play.test.FakeApplication; + +import static common.TestConstants.*; +import static play.test.Helpers.fakeApplication; +import static play.test.Helpers.running; +import static play.test.Helpers.testServer; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + + +/** + * Tests the class {@link MetricsController}. + */ +public class MetricsControllerTest { + + private static FakeApplication fakeApp; + + @Before + public void setup() { + Map additionalConfig = new HashMap(); + additionalConfig.put(DB_DEFAULT_DRIVER_KEY, DB_DEFAULT_DRIVER_VALUE); + additionalConfig.put(DB_DEFAULT_URL_KEY, DB_DEFAULT_URL_VALUE); + additionalConfig.put(EVOLUTION_PLUGIN_KEY, EVOLUTION_PLUGIN_VALUE); + additionalConfig.put(APPLY_EVOLUTIONS_DEFAULT_KEY, APPLY_EVOLUTIONS_DEFAULT_VALUE); + additionalConfig.put(METRICS_ENABLE_KEY, "true"); + fakeApp = fakeApplication(additionalConfig); + } + + /** + * Test fetching metrics for queue sizes from Dr.Elephant's "/metrics" endpoint. + */ + @Test + public void testQueueSizeMetrics() { + running(testServer(TEST_SERVER_PORT, fakeApp), new Runnable() { + public void run() { + // Initialize the metrics and set queue sizes for the main job queue, first retry queue and + // second retry queue. + MetricsController.init(); + MetricsController.setQueueSize(6); + MetricsController.setRetryQueueSize(4); + MetricsController.setSecondRetryQueueSize(2); + + // Initiate a request to metrics endpoint and verify the response. + JsonNode jsonResponse = getMetricsEndpointResponse(); + JsonNode metricsNode = getAndVerifyJsonNode(jsonResponse, "metrics"); + assertMetricsIntValue( + metricsNode, "AnalyticJob.jobQueue.size", 6); + assertMetricsIntValue( + metricsNode, "AnalyticJob.retryQueue.size", 4); + assertMetricsIntValue( + metricsNode, "AnalyticJob.secondRetryQueue.size", 2); + } + }); + } + + private static JsonNode getMetricsEndpointResponse() { + WS.Response response = WS.url(BASE_URL + METRICS_ENDPOINT). + get().get(RESPONSE_TIMEOUT, TimeUnit.MILLISECONDS); + return response.asJson(); + } + + private static JsonNode getAndVerifyJsonNode(JsonNode rootNode, String nodeName) { + JsonNode node = rootNode.path(nodeName); + assertNotNull(nodeName + " node should have been returned", node); + return node; + } + + private static void assertMetricsIntValue(JsonNode metricsNode, String metricName, + int expectedValue) { + JsonNode metricNameNode = getAndVerifyJsonNode(metricsNode, metricName); + JsonNode valueNode = metricNameNode.path("value"); + assertNotNull("value node inside " + metricName + " node should have " + + "been returned", valueNode); + assertEquals(expectedValue, valueNode.asInt()); + } +} diff --git a/test/resources/JobTypeConf.xml b/test/resources/JobTypeConf.xml index 8a4cae3eb..c7d002898 100644 --- a/test/resources/JobTypeConf.xml +++ b/test/resources/JobTypeConf.xml @@ -74,4 +74,10 @@ mapred.child.java.opts + + TonY + TONY + tony.application.name + + diff --git a/travis.sh b/travis.sh index 1325ade24..06e826941 100755 --- a/travis.sh +++ b/travis.sh @@ -16,19 +16,123 @@ # the License. # +# +# Script to be used for building on Travis CI +# + ######################################################## # -# Global constants +# Global constants # ######################################################## -# Base path for most of the quality tool reports -readonly REPORTS_BASE_PATH="target/scala-2.10/" -# Default path for Findbugs report -readonly FINDBUGS_REPORT_PATH=$REPORTS_BASE_PATH"findbugs/report.xml" +# Tab for use in sed command +readonly TAB=$'\t' + +############################################################ +# Get files chnged in this PR using git commands. +# +# Arguments: +# None +# Returns: +# List of files changed in the PR +############################################################ +function getChangedFiles() { + # Get commit hashes which have been added in the PR + commitHashes=`git rev-list origin/HEAD..HEAD` + # Extract file names changed for each commit hash + changedFiles=$(for hash in $commitHashes; do + fileNamesForHash=`git show --name-only --oneline $hash | awk '{if (NR > 1) print}'` + if [ ! -z "${fileNamesForHash}" ]; then + echo "${fileNamesForHash}" + fi + done) + echo "${changedFiles}" | sort | uniq +} + +########################################################## +# Check if there are duplicates in CPD report above the +# configured threshold for the language. +# +# Arguments: +# arg1: CPD report file to be checked for duplicates +# arg2: List of files changed in the PR +# Returns: +# None +########################################################## +function dumpCPDSummaryForChangedFiles() { + reportDump=`cat $1` + for changedFile in $2; do + fileDuplicateCnt=`echo "${reportDump}" | grep $changedFile | wc -l` + if [ $fileDuplicateCnt -gt 0 ]; then + echo -e "\tDuplicate info for file $changedFile:" + echo -e "\t------------------------------------------------------------------------------------"; + echo $reportDump | awk -v filename="$changedFile" '{ + # Iterate over all the duplicates in CPD report + numDuplicates = split($0, duplicates, ".*<\/codefragment>/, "", duplicates[duplicateIdx]); + # Proceed only if filename is found. + if (index(duplicates[duplicateIdx], filename) > 0) { + # Sanitize the report for processing. + sub(/<\/duplication>/, "", duplicates[duplicateIdx]) + sub(/<\/pmd-cpd>/, "", duplicates[duplicateIdx]) + gsub(//, "", duplicates[duplicateIdx]) + gsub(/"\/>/, "", duplicates[duplicateIdx]) + gsub(///g;s/\" /\", /g"` + echo "${finalResults}" | awk '{ + # Iterate over all the Checkstyle/Scalastyle issues for the changed file, filtered by severity + numIssues = split($0, issues, "\n") + for (issueIdx = 1; issueIdx <= numIssues; issueIdx++) { + # Convert special encoding in XML file such as " and &apos + gsub(/"/, "", issues[issueIdx]); + gsub("\\'", "'"'"'", issues[issueIdx]); + gsub("\\"", "\"", issues[issueIdx]); + # Next 4 variables hold the attribute values for line, column, message and source attributes respectively. + line="" + column="" + message="" + source="" + + # Indicates whether message attribute is being processed. + processingMessage = 0; + + # Extract different attributes for each issue by splitting the line by comma + # and for each attribute, extract its value. The attributes we are interested + # in are line, column, source and message. + # 1. Line indicates the line at which checkstyle/scalastyle issue exists + # 2. Column indicates the column at which checkstyle/scalastyle issue exists + # 3. Message is explanation about the issue. + # 4. Source is Checkstyle/Scalastyle check which led to the issue. + numAttributes = split(issues[issueIdx], attributes, ","); + for (attributeIdx = 1; attributeIdx <= numAttributes; attributeIdx++) { + lineIdx = index(attributes[attributeIdx], "line="); + if (lineIdx > 0) { + line = line "" substr(attributes[attributeIdx], lineIdx + 5); + processingMessage = 0; + continue; + } + columnIdx = index(attributes[attributeIdx], "column="); + if (columnIdx > 0) { + column = column "" substr(attributes[attributeIdx], columnIdx + 7); + processingMessage = 0; + continue; + } + sourceIdx = index(attributes[attributeIdx], "source="); + if (sourceIdx > 0) { + source = source "" substr(attributes[attributeIdx], sourceIdx + 7); + processingMessage = 0; + continue; + } + + # Extract message from message attribute. As message can contain commas as well, continue to append to message + # till another attribute is encountered. + messageIdx = index(attributes[attributeIdx], "message="); + if (messageIdx > 0) { + message = message "" substr(attributes[attributeIdx], messageIdx + 8); + processingMessage = 1; + } else if (processingMessage == 1) { + message = message "," attributes[attributeIdx]; + } + } + # Remove dot from the end of the message. + split(message, chars, ""); + len = length(message); + if (chars[len] == ".") { + message="" substr(message, 1, len - 1); + } + + # Extract last section of source string, separated by dot. + numSourceParts = split(source, sourceParts, "."); + # Print style information in the desired format + printf("\t + %s (%s) at line: %s%s\n", sourceParts[numSourceParts], message, line, ((column == "") ? "." : (" and column: " column "."))); + } + }' + fi + fi + done + ) + echo "${styleIssuesForChangedFiles}" +} + +############################################################ +# Capitalizes first character of passed string +# +# Arguments: +# arg1: String whose first character has to be capitalized +# Returns: +# String with first character captialized +############################################################ +function capitalizeFirstCharOfStr() { + echo $1 | awk '{ + split($0, chars, "") + for (i = 1; i <= length($0); i++) { + if (i == 1) { + printf("%s", toupper(chars[i])); + } else { + printf("%s", chars[i]); + } + } + }' +} + +################################################################### +# Process checkstyle/scalastyle report after the tool has been run +# This method will find out whether report has been generated and +# how many errors/warnings exist. +# If errors exist and they are above threshold, fail the build. +# Print summary of checkstyle/scalastyle warnings for changed files +# if checkStyleToolWarnings function returns 1 i.e. if warnings are +# above baseline. +# Print summary of checkstyle/scalastyle errors for changed files +# if checkStyleToolErrors function returns 1 i.e. if errors are +# above threshold. +# If warnings exist and above baseline, warn the user and print +# top 10 issues at warning severity grouped by issue type. +# Also print top 10 issues at error severity grouped by issue +# type if errors are equal to threshold (for informational +# purposes) +# +# Arguments: +# arg1: Indicates the tool whose report will be processed +# (Checkstyle or Scalastyle) +# arg2: Report location for the tool whose report is to be +# processed +# arg3: Error threshold, above which build would fail, for +# the tool whose report will be processed +# arg4: Warnings baseline for the tool whose report will be +# processed +# arg5: Name of the error threshold constant for the tool +# and language +# arg6: Name of the warning baseline constant for the tool +# and language +# arg7: List of files changed in the PR +# Returns: +# None +################################################################### +function processStyleReports() { + # Verify if style report exists for the tool whose report is being processed + verifyStyleReportExistence $1 $2 + + # Check warnings in Checkstyle/Scalastyle report + checkStyleToolWarnings $1 $2 $4 $6 numWarnings + result=$? + if [ $result -gt 0 ]; then + if [ $result -eq 1 ]; then + # If there are warnings above baseline, find all warnings for changed files + styleIssuesForChangedFiles=$(getStyleIssuesForChangedFiles "${7}" $2 "warning") + fileCnt=`echo "${styleIssuesForChangedFiles}" | grep "Failed checks for file" | wc -l | xargs` + if [ $fileCnt -gt 0 ]; then + echo -e "$WARNING_COLOR_PREFIX Note: This PR may have introduced $1 warnings (baseline: $4)" + echo -e "$WARNING_COLOR_PREFIX Listing $1 WARNINGS for the files changed in the PR:" + echo "${styleIssuesForChangedFiles}" + else + msgToResetStyleReportWarning $1 $4 $6 $numWarnings + fi + fi + fi + echo "" + + # Check errors in Checkstyle/Scalastyle report + checkStyleToolErrors $1 $2 $3 $5 + result=$? + if [ $result -gt 0 ]; then + if [ $result -eq 1 ]; then + echo -e "$ERROR_COLOR_PREFIX Listing $1 ERRORS for the files changed in the PR:" + styleIssuesForChangedFiles=$(getStyleIssuesForChangedFiles "${7}" $2 "error") + echo "${styleIssuesForChangedFiles}" + echo "" + echo -e "$ERROR_COLOR_PREFIX $1 step failed..." + fi + echo "" exit 1; fi + echo "" +} - # Incorrect report. Summary does not exist hence cannot parse the report. - summaryLine=`grep -i 'FindBugsSummary' $FINDBUGS_REPORT_PATH` - if [ -z "$summaryLine" ]; then - echo -e "$ERROR_COLOR_PREFIX Build failed as Findbugs summary could not be found in report..." +######################################################### +# Run CPD for the langauge passed, check for failures and +# move the final result to a separate file. +# +# Arguments: +# arg1: Language for which CPD is run (Java or Scala) +# arg2: Duplicates threshold for the language +# arg3: Name of the threshold constant for the language +# arg4: List of files changed in the PR +# Returns: +# None +######################################################### +function runCPDForLanguage() { + sbt cpd + if [ $? -ne 0 ]; then + echo -e "$ERROR_COLOR_PREFIX CPD for "$1" failed..." exit 1; fi + cpd_result_file=$(getCPDReportName $CPD_REPORT_BASE_PATH $1) + mv $CPD_REPORT_PATH $cpd_result_file + if [ $1 = "Scala" ]; then + removeLicenseHeaderDuplicates $cpd_result_file + fi + checkCPDReport $1 $2 $3 $cpd_result_file "${4}" +} + +########################################################## +# Run CPD for Java and Scala one by one. For Scala, first +# change cpdLanguage setting in cpd.sbt to Language.Scala +# and then run CPD. +# +# Arguments: +# arg1: List of files changed in the PR +# Returns: +# None +########################################################## +function runCPD() { + echo -e "$INFO_COLOR_PREFIX Running Copy Paste Detector(CPD) for Java..." + runCPDForLanguage "Java" $JAVA_CPD_THRESHOLD "JAVA_CPD_THRESHOLD" "${1}" + + echo "" + # Change language to Scala before running CPD again. + changeCPDLanguageSetting "Language.Java" "Language.Scala" + echo -e "$INFO_COLOR_PREFIX Running Copy Paste Detector(CPD) for Scala..." + runCPDForLanguage "Scala" $SCALA_CPD_THRESHOLD "SCALA_CPD_THRESHOLD" "${1}" + echo "" +} - # Fetch bugs from the report and if any bugs are found, fail the build. - totalBugs=`echo $summaryLine | grep -o 'total_bugs="[0-9]*'` - totalBugs=`echo $totalBugs | awk -F'="' '{print $2}'` - if [ $totalBugs -gt 0 ];then - echo -e "$ERROR_COLOR_PREFIX Build failed due to "$totalBugs" Findbugs issues..." +############################################################ +# Run sbt checkstyle or scalastyle command, parse the report +# and if errors are found above threshold, fail the build. +# +# Arguments: +# arg1: Command/tool to be run (checkstyle or scalastyle) +# arg2: Report location for the tool being run +# arg3: Error threshold, above which build would fail, for +# the tool being run +# arg4: Warnings baseline for the tool being run +# arg5: Name of the error threshold constant for the tool +# and language +# arg6: Name of the warning baseline constant for the tool +# and language +# arg7: List of files changed in the PR +# Returns: +# None +############################################################ +function runStylingTool() { + sbt $1 + if [ $? -ne 0 ]; then + echo -e "$ERROR_COLOR_PREFIX $1 step failed..." exit 1; fi + if [ $1 = "scalastyle" ]; then + preProcessScalastyleReport $SCALASTYLE_REPORT_PATH + fi + processStyleReports $(capitalizeFirstCharOfStr $1) $2 $3 $4 $5 $6 "${7}" } ######################################################## @@ -75,10 +466,28 @@ function runFindbugs() { # ######################################################## +# Import baseline/threshold numbers used across compile.sh and travis.sh +source baseline.conf +# Import common functions used across compile.sh and travis.sh +source common.sh +readonly changedFilesList=$(getChangedFiles) +echo "" +if [ ! -z "${changedFilesList}" ]; then + echo "***********************************************************" + echo -e "$INFO_COLOR_PREFIX Files changed (added, modified, deleted) in this PR are:" + echo "${changedFilesList}" | awk '{ + numFiles = split($0, files, "\n") + for (fileIdx = 1; fileIdx <= numFiles; fileIdx++) { + printf("\t- %s\n", files[fileIdx]); + } + }' +fi + echo "" echo "************************************************************" echo " 1. Compile Dr.Elephant code" echo "************************************************************" +echo -e "$INFO_COLOR_PREFIX Compiling code..." sbt clean compile if [ $? -ne 0 ]; then echo -e "$ERROR_COLOR_PREFIX Compilation failed..." @@ -95,8 +504,32 @@ echo -e "$SUCCESS_COLOR_PREFIX Findbugs step succeeded..." echo "" echo "************************************************************" -echo " 3. Run unit tests and code coverage" +echo " 3. Run Copy Paste Detector(CPD)" +echo "************************************************************" +runCPD "${changedFilesList}" +echo -e "$SUCCESS_COLOR_PREFIX Copy Paste Detector(CPD) step succeeded..." + +echo "" +echo "************************************************************" +echo " 4. Checkstyle for JAVA code" +echo "************************************************************" +echo -e "$INFO_COLOR_PREFIX Running Checkstyle..." +runStylingTool "checkstyle" $CHECKSTYLE_REPORT_PATH $CHECKSTYLE_ERROR_THRESHOLD $CHECKSTYLE_WARNING_BASELINE "CHECKSTYLE_ERROR_THRESHOLD" "CHECKSTYLE_WARNING_BASELINE" "${changedFilesList}" +echo -e "$SUCCESS_COLOR_PREFIX Checkstyle step succeeded..." + +echo "" +echo "************************************************************" +echo " 5. Scalastyle for Scala code" +echo "************************************************************" +echo -e "$INFO_COLOR_PREFIX Running Scalastyle..." +runStylingTool "scalastyle" $SCALASTYLE_REPORT_PATH $SCALASTYLE_ERROR_THRESHOLD $SCALASTYLE_WARNING_BASELINE "SCALASTYLE_ERROR_THRESHOLD" "SCALASTYLE_WARNING_BASELINE" "${changedFilesList}" +echo -e "$SUCCESS_COLOR_PREFIX Scalastyle step succeeded..." + +echo "" +echo "************************************************************" +echo " 6. Run unit tests and code coverage" echo "************************************************************" +echo -e "$INFO_COLOR_PREFIX Running unit tests and code coverage..." sbt test jacoco:cover if [ $? -ne 0 ]; then echo -e "$ERROR_COLOR_PREFIX Unit tests or code coverage failed..." diff --git a/web/package.json b/web/package.json index 1e2eeabf3..da4bd457a 100644 --- a/web/package.json +++ b/web/package.json @@ -19,7 +19,7 @@ "author": "", "license": "MIT", "devDependencies": { - "bower": "1.7.9", + "bower": "1.8.8", "broccoli-asset-rev": "2.4.6", "ember-ajax": "2.5.1", "ember-cli": "2.6.3",