Skip to content

Commit

Permalink
Operations on second retry queue for jobs in Dr Elephant are not thre…
Browse files Browse the repository at this point in the history
…ad safe (linkedin#496)

* Operations on second retry queue for jobs in Dr Elephant are not thread safe

* Change second retry queue to LinkedList
  • Loading branch information
varunsaxena authored and mkumar1984 committed Feb 4, 2019
1 parent 5d3f96c commit 17635f6
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package com.linkedin.drelephant.analysis;

import com.google.common.annotations.VisibleForTesting;
import com.linkedin.drelephant.ElephantContext;
import com.linkedin.drelephant.math.Statistics;
import controllers.MetricsController;
Expand Down Expand Up @@ -65,7 +66,7 @@ public class AnalyticJobGeneratorHadoop2 implements AnalyticJobGenerator {

private final Queue<AnalyticJob> _firstRetryQueue = new ConcurrentLinkedQueue<AnalyticJob>();

private final ArrayList<AnalyticJob> _secondRetryQueue = new ArrayList<AnalyticJob>();
private final List<AnalyticJob> _secondRetryQueue = new LinkedList<AnalyticJob>();

public void updateResourceManagerAddresses() {
if (Boolean.valueOf(configuration.get(IS_RM_HA_ENABLED))) {
Expand Down Expand Up @@ -171,19 +172,28 @@ public List<AnalyticJob> fetchAnalyticJobs()
appList.add(_firstRetryQueue.poll());
}

Iterator iteratorSecondRetry = _secondRetryQueue.iterator();
while (iteratorSecondRetry.hasNext()) {
AnalyticJob job = (AnalyticJob) iteratorSecondRetry.next();
if(job.readyForSecondRetry()) {
appList.add(job);
iteratorSecondRetry.remove();
}
}
// Fetch jobs from second retry queue which are ready for second retry and
// add to app list.
fetchJobsFromSecondRetryQueue(appList);

_lastTime = _currentTime;
return appList;
}

@VisibleForTesting
void fetchJobsFromSecondRetryQueue(List<AnalyticJob> appList) {
synchronized (_secondRetryQueue) {
Iterator iteratorSecondRetry = _secondRetryQueue.iterator();
while (iteratorSecondRetry.hasNext()) {
AnalyticJob job = (AnalyticJob) iteratorSecondRetry.next();
if (job.readyForSecondRetry()) {
appList.add(job);
iteratorSecondRetry.remove();
}
}
}
}

@Override
public void addIntoRetries(AnalyticJob promise) {
_firstRetryQueue.add(promise);
Expand All @@ -193,9 +203,12 @@ public void addIntoRetries(AnalyticJob promise) {
}

@Override
public void addIntoSecondRetryQueue(AnalyticJob promise) {
_secondRetryQueue.add(promise.setTimeToSecondRetry());
int secondRetryQueueSize = _secondRetryQueue.size();
public void addIntoSecondRetryQueue(AnalyticJob job) {
int secondRetryQueueSize;
synchronized (_secondRetryQueue) {
_secondRetryQueue.add(job.setTimeToSecondRetry());
secondRetryQueueSize = _secondRetryQueue.size();
}
MetricsController.setSecondRetryQueueSize(secondRetryQueueSize);
logger.info("Second Retry queue size is " + secondRetryQueueSize);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/*
* Copyright 2016 LinkedIn Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package com.linkedin.drelephant.analysis;

import java.util.ArrayList;
import java.util.ConcurrentModificationException;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.junit.Test;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when;


/**
* Unit tests for class {@link AnalyticJobGeneratorHadoop2}.
*/
public class AnalyticJobGeneratorHadoop2Test {
/**
* Tests concurrent operations (fetch and add) on second retry queue.
*/
@Test
public void testSecondRetryQueueConcurrentOperations() {
final AnalyticJobGeneratorHadoop2 analyticJobGenerator =
new AnalyticJobGeneratorHadoop2();

// Latch to ensure operations on second retry queue from multiple threads
// run in parallel
final CountDownLatch latch = new CountDownLatch(1);

// Add a job into second retry queue.
AnalyticJob job1 = spy(new AnalyticJob());
// Custom answer on call to readyForSecondRetry for this job.
doAnswer(new Answer<Boolean>() {
@Override
public Boolean answer(final InvocationOnMock invocation) throws Throwable {
// Wait for additions to second retry queue from add jobs thread to
// begin. In case of synchronized access, wait will be for 5 sec.
// In case of unsynchronized access, this would lead to
// ConcurrentModificationException.
latch.await(5000L, TimeUnit.MILLISECONDS);
return true ;
}
}).when(job1).readyForSecondRetry();
analyticJobGenerator.addIntoSecondRetryQueue(job1);

// Add couple of other jobs to second retry queue.
AnalyticJob job2 = spy(new AnalyticJob());
when(job2.readyForSecondRetry()).thenReturn(false);
analyticJobGenerator.addIntoSecondRetryQueue(job2);

AnalyticJob job3 = spy(new AnalyticJob());
when(job3.readyForSecondRetry()).thenReturn(true);
analyticJobGenerator.addIntoSecondRetryQueue(job3);

final List<AnalyticJob> appList = new ArrayList<AnalyticJob>();
// Flag to indicate if ConcurrentModificationException has been thrown.
final AtomicBoolean cmExceptionFlag = new AtomicBoolean(false);
// Start a fetch jobs thread which calls fetchJobsFromSecondRetryQueue
// method.
Thread fetchJobsThread = new Thread(new Runnable() {
@Override
public void run() {
try {
analyticJobGenerator.fetchJobsFromSecondRetryQueue(appList);
} catch (ConcurrentModificationException e) {
cmExceptionFlag.set(true);
}
}
});
fetchJobsThread.start();

// Start a add jobs jobs thread which adds a couple of jobs into second
// retry queue while fetch jobs thread is running in parallel.
Thread addJobsThread = new Thread(new Runnable() {
@Override
public void run() {
AnalyticJob job4 = spy(new AnalyticJob());
when(job4.readyForSecondRetry()).thenReturn(false);
analyticJobGenerator.addIntoSecondRetryQueue(job4);

// Latch countdown to ensure fetch jobs thread can continue.
latch.countDown();

AnalyticJob job5 = spy(new AnalyticJob());
when(job5.readyForSecondRetry()).thenReturn(true);
analyticJobGenerator.addIntoSecondRetryQueue(job5);
}
});
addJobsThread.start();

// Wait for both the threads to finish.
try {
fetchJobsThread.join();
addJobsThread.join();
} catch (InterruptedException e) {
// Ignore the exception.
}

// Concurrent operations from multiple threads should not lead to
// ConcurrentModificationException as accesses to second retry queue are
// synchronized.
assertFalse("ConcurrentModificationException should not have been thrown " +
"while fetching jobs", cmExceptionFlag.get());
// Checking for apps >= 2 as the exact number can be 2 or 3 depending on
// order of invocation of threads.
assertTrue("Apps fetched from second retry queue should be >= 2.",
appList.size() >= 2);

// Drain the second retry queue by fetching jobs from it.
analyticJobGenerator.fetchJobsFromSecondRetryQueue(appList);
assertEquals("Apps fetched from second retry queue should be 3.", 3,
appList.size());
}
}

0 comments on commit 17635f6

Please sign in to comment.