From 3864480e14a4961720cc1be43635c7c7dec08c09 Mon Sep 17 00:00:00 2001 From: sarutak Date: Wed, 29 Aug 2018 07:13:13 -0700 Subject: [PATCH] [SPARK-25266][CORE] Fix memory leak in Barrier Execution Mode ## What changes were proposed in this pull request? BarrierCoordinator uses Timer and TimerTask. `TimerTask#cancel()` is invoked in ContextBarrierState#cancelTimerTask but `Timer#purge()` is never invoked. Once a TimerTask is scheduled, the reference to it is not released until `Timer#purge()` is invoked even though `TimerTask#cancel()` is invoked. ## How was this patch tested? I checked the number of instances related to the TimerTask using jmap. Closes #22258 from sarutak/fix-barrierexec-oom. Authored-by: sarutak Signed-off-by: Xiangrui Meng --- core/src/main/scala/org/apache/spark/BarrierCoordinator.scala | 1 + core/src/main/scala/org/apache/spark/BarrierTaskContext.scala | 1 + 2 files changed, 2 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala b/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala index 5e546c694e8d9..6439ca5db06e9 100644 --- a/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala +++ b/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala @@ -123,6 +123,7 @@ private[spark] class BarrierCoordinator( private def cancelTimerTask(): Unit = { if (timerTask != null) { timerTask.cancel() + timer.purge() timerTask = null } } diff --git a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala index de827987f28f9..3901f96326f75 100644 --- a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala +++ b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala @@ -140,6 +140,7 @@ class BarrierTaskContext( throw e } finally { timerTask.cancel() + timer.purge() } }