From 0ef7fddeff8430fd40d2d7a1b8a6454fd9416ced Mon Sep 17 00:00:00 2001 From: Till Rohrmann Date: Thu, 14 Dec 2017 15:24:35 +0100 Subject: [PATCH] [FLINK-7956] [flip6] Add support for queued scheduling with slot sharing to SlotPool This commit adds support for queued scheduling with slot sharing to the SlotPool. The idea of slot sharing is that multiple tasks can run in the same slot. Moreover, queued scheduling means that a slot request must not be completed right away but at a later point in time. This allows to start new TaskExecutors in case that there are no more slots left. The main component responsible for the management of shared slots is the SlotSharingManager. The SlotSharingManager maintains internally a tree-like structure which stores the SlotContext future of the underlying AllocatedSlot. Whenever this future is completed potentially pending LogicalSlot instantiations are executed and sent to the slot requester. A shared slot is represented by a MultiTaskSlot which can harbour multiple TaskSlots. A TaskSlot can either be a MultiTaskSlot or a SingleTaskSlot. In order to represent co-location constraints, we first obtain a root MultiTaskSlot and then allocate a nested MultiTaskSlot in which the co-located tasks are allocated. The corresponding SlotRequestID is assigned to the CoLocationConstraint in order to make the TaskSlot retrievable for other tasks assigned to the same CoLocationConstraint. Port SchedulerSlotSharingTest, SchedulerIsolatedTasksTest and ScheduleWithCoLocationHintTest to run with SlotPool. Restructure SlotPool components. Add SlotSharingManagerTest, SlotPoolSlotSharingTest and SlotPoolCoLocationTest. This closes #5091. --- .../InputChannelDeploymentDescriptor.java | 2 +- .../runtime/executiongraph/Execution.java | 31 +- .../executiongraph/ExecutionGraph.java | 2 +- .../executiongraph/ExecutionGraphBuilder.java | 2 +- .../executiongraph/ExecutionJobVertex.java | 2 +- .../executiongraph/ExecutionVertex.java | 4 +- .../flink/runtime/instance/Instance.java | 10 +- .../flink/runtime/instance/SharedSlot.java | 40 +- .../flink/runtime/instance/SimpleSlot.java | 31 +- .../slots => instance}/SimpleSlotContext.java | 14 +- .../apache/flink/runtime/instance/Slot.java | 13 +- .../instance/SlotSharingGroupAssignment.java | 45 +- ...RequestID.java => SlotSharingGroupId.java} | 16 +- .../scheduler/CoLocationConstraint.java | 62 +- .../NoResourceAvailableException.java | 4 +- .../jobmanager/scheduler/ScheduledUnit.java | 106 +- .../jobmanager/scheduler/Scheduler.java | 38 +- .../scheduler/SlotSharingGroup.java | 11 +- .../jobmanager/slots/SlotAndLocality.java | 6 +- .../flink/runtime/jobmaster/JobMaster.java | 5 +- .../{instance => jobmaster}/LogicalSlot.java | 60 +- .../slots => jobmaster}/SlotContext.java | 15 +- .../slots => jobmaster}/SlotOwner.java | 4 +- .../runtime/jobmaster/SlotRequestId.java | 37 + .../slotpool}/AllocatedSlot.java | 170 +--- .../slotpool/AllocatedSlotActions.java | 48 + .../slotpool}/DualKeyMap.java | 12 +- .../jobmaster/slotpool/SingleLogicalSlot.java | 170 ++++ .../slotpool}/SlotPool.java | 640 +++++++++--- .../slotpool}/SlotPoolGateway.java | 83 +- .../slotpool}/SlotProvider.java | 3 +- .../slotpool/SlotSharingManager.java | 740 ++++++++++++++ .../CheckpointSettingsSerializableTest.java | 2 +- .../InputChannelDeploymentDescriptorTest.java | 2 +- .../ArchivedExecutionGraphTest.java | 2 +- .../ExecutionGraphDeploymentTest.java | 4 +- .../ExecutionGraphMetricsTest.java | 4 +- .../ExecutionGraphRestartTest.java | 2 +- .../ExecutionGraphSchedulingTest.java | 10 +- .../ExecutionGraphSuspendTest.java | 2 +- .../ExecutionGraphTestUtils.java | 8 +- .../runtime/executiongraph/ExecutionTest.java | 4 +- .../ExecutionVertexDeploymentTest.java | 4 +- .../ExecutionVertexLocalityTest.java | 10 +- .../ExecutionVertexSchedulingTest.java | 4 +- .../executiongraph/FailoverRegionTest.java | 2 +- .../IndividualRestartsConcurrencyTest.java | 2 +- .../executiongraph/LegacyJobVertexIdTest.java | 2 +- ...ipelinedRegionFailoverConcurrencyTest.java | 2 +- .../ProgrammedSlotProvider.java | 4 +- .../PipelinedFailoverRegionBuildingTest.java | 2 +- .../utils/SimpleSlotProvider.java | 12 +- .../flink/runtime/instance/InstanceTest.java | 8 +- .../runtime/instance/SharedSlotsTest.java | 42 +- .../runtime/instance/SimpleSlotTest.java | 5 +- .../SlotSharingGroupAssignmentTest.java | 2 +- .../scheduler/CoLocationConstraintTest.java | 2 +- .../ScheduleWithCoLocationHintTest.java | 953 ++++++++---------- .../scheduler/SchedulerIsolatedTasksTest.java | 519 ++++------ .../scheduler/SchedulerSlotSharingTest.java | 683 ++++++------- .../jobmanager/scheduler/SchedulerTest.java | 91 ++ .../scheduler/SchedulerTestBase.java | 416 ++++++++ .../scheduler/SchedulerTestUtils.java | 60 +- .../jobmanager/slots/DummySlotOwner.java | 3 +- .../jobmanager/slots/TestingSlotOwner.java | 3 +- .../TestingLogicalSlot.java | 32 +- .../TestingPayload.java | 2 +- .../slotpool}/AllocatedSlotsTest.java | 13 +- .../slotpool}/AvailableSlotsTest.java | 8 +- .../slotpool/SlotPoolCoLocationTest.java | 138 +++ .../slotpool}/SlotPoolRpcTest.java | 58 +- .../slotpool/SlotPoolSchedulingTestBase.java | 95 ++ .../slotpool/SlotPoolSlotSharingTest.java | 304 ++++++ .../slotpool}/SlotPoolTest.java | 115 ++- .../slotpool/SlotSharingManagerTest.java | 519 ++++++++++ .../slotpool/TestingAllocatedSlotActions.java | 52 + .../test/recovery/FastFailuresITCase.java | 2 + 77 files changed, 4804 insertions(+), 1831 deletions(-) rename flink-runtime/src/main/java/org/apache/flink/runtime/{jobmanager/slots => instance}/SimpleSlotContext.java (86%) rename flink-runtime/src/main/java/org/apache/flink/runtime/instance/{SlotRequestID.java => SlotSharingGroupId.java} (73%) rename flink-runtime/src/main/java/org/apache/flink/runtime/{instance => jobmaster}/LogicalSlot.java (64%) rename flink-runtime/src/main/java/org/apache/flink/runtime/{jobmanager/slots => jobmaster}/SlotContext.java (80%) rename flink-runtime/src/main/java/org/apache/flink/runtime/{jobmanager/slots => jobmaster}/SlotOwner.java (92%) create mode 100644 flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/SlotRequestId.java rename flink-runtime/src/main/java/org/apache/flink/runtime/{instance => jobmaster/slotpool}/AllocatedSlot.java (54%) create mode 100644 flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/AllocatedSlotActions.java rename flink-runtime/src/main/java/org/apache/flink/runtime/{instance => jobmaster/slotpool}/DualKeyMap.java (89%) create mode 100644 flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SingleLogicalSlot.java rename flink-runtime/src/main/java/org/apache/flink/runtime/{instance => jobmaster/slotpool}/SlotPool.java (63%) rename flink-runtime/src/main/java/org/apache/flink/runtime/{instance => jobmaster/slotpool}/SlotPoolGateway.java (55%) rename flink-runtime/src/main/java/org/apache/flink/runtime/{instance => jobmaster/slotpool}/SlotProvider.java (94%) create mode 100644 flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SlotSharingManager.java create mode 100644 flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerTest.java create mode 100644 flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerTestBase.java rename flink-runtime/src/test/java/org/apache/flink/runtime/{instance => jobmaster}/TestingLogicalSlot.java (79%) rename flink-runtime/src/test/java/org/apache/flink/runtime/{instance => jobmaster}/TestingPayload.java (96%) rename flink-runtime/src/test/java/org/apache/flink/runtime/{instance => jobmaster/slotpool}/AllocatedSlotsTest.java (94%) rename flink-runtime/src/test/java/org/apache/flink/runtime/{instance => jobmaster/slotpool}/AvailableSlotsTest.java (95%) create mode 100644 flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolCoLocationTest.java rename flink-runtime/src/test/java/org/apache/flink/runtime/{instance => jobmaster/slotpool}/SlotPoolRpcTest.java (87%) create mode 100644 flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolSchedulingTestBase.java create mode 100644 flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolSlotSharingTest.java rename flink-runtime/src/test/java/org/apache/flink/runtime/{instance => jobmaster/slotpool}/SlotPoolTest.java (87%) create mode 100644 flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotSharingManagerTest.java create mode 100644 flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/TestingAllocatedSlotActions.java diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/deployment/InputChannelDeploymentDescriptor.java b/flink-runtime/src/main/java/org/apache/flink/runtime/deployment/InputChannelDeploymentDescriptor.java index 8d7620711ac6a..7c2b30db32ac3 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/deployment/InputChannelDeploymentDescriptor.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/deployment/InputChannelDeploymentDescriptor.java @@ -24,11 +24,11 @@ import org.apache.flink.runtime.executiongraph.ExecutionEdge; import org.apache.flink.runtime.executiongraph.ExecutionGraphException; import org.apache.flink.runtime.executiongraph.IntermediateResultPartition; -import org.apache.flink.runtime.instance.LogicalSlot; import org.apache.flink.runtime.io.network.ConnectionID; import org.apache.flink.runtime.io.network.partition.ResultPartitionID; import org.apache.flink.runtime.io.network.partition.consumer.InputChannel; import org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate; +import org.apache.flink.runtime.jobmaster.LogicalSlot; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import java.io.Serializable; diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/Execution.java b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/Execution.java index 12a6749486c5e..cc35060e7aeb1 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/Execution.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/Execution.java @@ -33,8 +33,9 @@ import org.apache.flink.runtime.deployment.ResultPartitionLocation; import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor; import org.apache.flink.runtime.execution.ExecutionState; -import org.apache.flink.runtime.instance.LogicalSlot; -import org.apache.flink.runtime.instance.SlotProvider; +import org.apache.flink.runtime.instance.SlotSharingGroupId; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.io.network.ConnectionID; import org.apache.flink.runtime.io.network.partition.ResultPartitionID; import org.apache.flink.runtime.jobmanager.scheduler.CoLocationConstraint; @@ -51,6 +52,8 @@ import org.slf4j.Logger; +import javax.annotation.Nullable; + import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -441,9 +444,11 @@ public CompletableFuture allocateAndAssignSlotForExecution( // this method only works if the execution is in the state 'CREATED' if (transitionState(CREATED, SCHEDULED)) { + final SlotSharingGroupId slotSharingGroupId = sharingGroup != null ? sharingGroup.getSlotSharingGroupId() : null; + ScheduledUnit toSchedule = locationConstraint == null ? - new ScheduledUnit(this, sharingGroup) : - new ScheduledUnit(this, sharingGroup, locationConstraint); + new ScheduledUnit(this, slotSharingGroupId) : + new ScheduledUnit(this, slotSharingGroupId, locationConstraint); // calculate the preferred locations final CompletableFuture> preferredLocationsFuture = calculatePreferredLocations(locationPreferenceConstraint); @@ -461,7 +466,7 @@ public CompletableFuture allocateAndAssignSlotForExecution( return this; } else { // release the slot - logicalSlot.releaseSlot(); + logicalSlot.releaseSlot(new FlinkException("Could not assign logical slot to execution " + this + '.')); throw new CompletionException(new FlinkException("Could not assign slot " + logicalSlot + " to execution " + this + " because it has already been assigned ")); } @@ -513,7 +518,7 @@ public void deploy() throws JobException { // race double check, did we fail/cancel and do we need to release the slot? if (this.state != DEPLOYING) { - slot.releaseSlot(); + slot.releaseSlot(new FlinkException("Actual state of execution " + this + " (" + state + ") does not match expected state DEPLOYING.")); return; } @@ -622,7 +627,7 @@ else if (current == CREATED || current == SCHEDULED) { try { vertex.getExecutionGraph().deregisterExecution(this); - releaseAssignedResource(); + releaseAssignedResource(new FlinkException("Execution " + this + " was cancelled.")); } finally { vertex.executionCanceled(this); @@ -890,7 +895,7 @@ void markFinished(Map> userAccumulators, IOMetrics met updateAccumulatorsAndMetrics(userAccumulators, metrics); - releaseAssignedResource(); + releaseAssignedResource(null); vertex.getExecutionGraph().deregisterExecution(this); } @@ -943,7 +948,7 @@ else if (current == CANCELING || current == RUNNING || current == DEPLOYING) { if (transitionState(current, CANCELED)) { try { - releaseAssignedResource(); + releaseAssignedResource(new FlinkException("Execution " + this + " was cancelled.")); vertex.getExecutionGraph().deregisterExecution(this); } @@ -1035,7 +1040,7 @@ private boolean processFail(Throwable t, boolean isCallback, Map { if (throwable != null) { releaseFuture.completeExceptionally(throwable); diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraph.java b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraph.java index c4ff6fb8673a7..a02a687780bbd 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraph.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraph.java @@ -50,7 +50,7 @@ import org.apache.flink.runtime.executiongraph.restart.ExecutionGraphRestartCallback; import org.apache.flink.runtime.executiongraph.restart.RestartCallback; import org.apache.flink.runtime.executiongraph.restart.RestartStrategy; -import org.apache.flink.runtime.instance.SlotProvider; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.io.network.partition.ResultPartitionID; import org.apache.flink.runtime.jobgraph.IntermediateDataSetID; import org.apache.flink.runtime.jobgraph.JobStatus; diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraphBuilder.java b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraphBuilder.java index 2a4315ddacb53..34ba3df5c1fab 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraphBuilder.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraphBuilder.java @@ -43,7 +43,7 @@ import org.apache.flink.runtime.executiongraph.metrics.RestartTimeGauge; import org.apache.flink.runtime.executiongraph.metrics.UpTimeGauge; import org.apache.flink.runtime.executiongraph.restart.RestartStrategy; -import org.apache.flink.runtime.instance.SlotProvider; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.jobgraph.JobGraph; import org.apache.flink.runtime.jobgraph.JobVertex; import org.apache.flink.runtime.jobgraph.JobVertexID; diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionJobVertex.java b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionJobVertex.java index fff7ce1fa30c5..bb5ad28a1ea5d 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionJobVertex.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionJobVertex.java @@ -35,7 +35,7 @@ import org.apache.flink.runtime.blob.BlobWriter; import org.apache.flink.runtime.blob.PermanentBlobKey; import org.apache.flink.runtime.execution.ExecutionState; -import org.apache.flink.runtime.instance.SlotProvider; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.jobgraph.IntermediateDataSet; import org.apache.flink.runtime.jobgraph.IntermediateDataSetID; import org.apache.flink.runtime.jobgraph.JobEdge; diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionVertex.java b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionVertex.java index 27f2d5d95466d..cb4f2c8e56430 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionVertex.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionVertex.java @@ -32,9 +32,9 @@ import org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor; import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor; import org.apache.flink.runtime.execution.ExecutionState; -import org.apache.flink.runtime.instance.LogicalSlot; +import org.apache.flink.runtime.jobmaster.LogicalSlot; import org.apache.flink.runtime.instance.SimpleSlot; -import org.apache.flink.runtime.instance.SlotProvider; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.io.network.partition.ResultPartitionID; import org.apache.flink.runtime.io.network.partition.ResultPartitionType; import org.apache.flink.runtime.jobgraph.DistributionPattern; diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/Instance.java b/flink-runtime/src/main/java/org/apache/flink/runtime/instance/Instance.java index 44ee29d2049c0..0878f7580886f 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/Instance.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/instance/Instance.java @@ -20,9 +20,11 @@ import org.apache.flink.runtime.clusterframework.types.ResourceID; import org.apache.flink.runtime.jobmanager.scheduler.SlotAvailabilityListener; -import org.apache.flink.runtime.jobmanager.slots.SlotOwner; import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.jobmaster.SlotOwner; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; +import org.apache.flink.util.FlinkException; import org.apache.flink.util.Preconditions; import org.slf4j.Logger; @@ -163,8 +165,9 @@ public void markDead() { * owning the assignment group lock wants to give itself back to the instance which requires * the instance lock */ + final FlinkException cause = new FlinkException("Instance " + this + " has been marked as dead."); for (Slot slot : slots) { - slot.releaseInstanceSlot(); + slot.releaseSlot(cause); } } @@ -321,8 +324,9 @@ public void cancelAndReleaseAllSlots() { copy = new ArrayList(this.allocatedSlots); } + final FlinkException cause = new FlinkException("Cancel and release all slots of instance " + this + '.'); for (Slot slot : copy) { - slot.releaseInstanceSlot(); + slot.releaseSlot(cause); } } diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SharedSlot.java b/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SharedSlot.java index 8c9fe1a4b29cc..d922d7c94288b 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SharedSlot.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SharedSlot.java @@ -19,12 +19,14 @@ package org.apache.flink.runtime.instance; import org.apache.flink.runtime.clusterframework.types.AllocationID; -import org.apache.flink.runtime.jobmanager.slots.SlotContext; -import org.apache.flink.runtime.jobmanager.slots.SlotOwner; +import org.apache.flink.runtime.jobmanager.scheduler.Locality; import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.jobmaster.SlotContext; +import org.apache.flink.runtime.jobmaster.SlotOwner; +import org.apache.flink.runtime.jobmaster.SlotRequestId; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import org.apache.flink.util.AbstractID; -import org.apache.flink.util.FlinkException; import javax.annotation.Nullable; @@ -55,8 +57,6 @@ public class SharedSlot extends Slot implements LogicalSlot { /** The set os sub-slots allocated from this shared slot */ private final Set subSlots; - private final CompletableFuture cancellationFuture = new CompletableFuture<>(); - // ------------------------------------------------------------------------ // Old Constructors (prior FLIP-6) // ------------------------------------------------------------------------ @@ -72,9 +72,9 @@ public class SharedSlot extends Slot implements LogicalSlot { * @param assignmentGroup The assignment group that this shared slot belongs to. */ public SharedSlot( - SlotOwner owner, TaskManagerLocation location, int slotNumber, - TaskManagerGateway taskManagerGateway, - SlotSharingGroupAssignment assignmentGroup) { + SlotOwner owner, TaskManagerLocation location, int slotNumber, + TaskManagerGateway taskManagerGateway, + SlotSharingGroupAssignment assignmentGroup) { this(owner, location, slotNumber, taskManagerGateway, assignmentGroup, null, null); } @@ -174,6 +174,11 @@ public boolean hasChildren() { return subSlots.size() > 0; } + @Override + public Locality getLocality() { + return Locality.UNKNOWN; + } + @Override public boolean tryAssignPayload(Payload payload) { throw new UnsupportedOperationException("Cannot assign an execution attempt id to a shared slot."); @@ -186,9 +191,7 @@ public Payload getPayload() { } @Override - public CompletableFuture releaseSlot() { - cancellationFuture.completeExceptionally(new FlinkException("Shared slot " + this + " is being released.")); - + public CompletableFuture releaseSlot(@Nullable Throwable cause) { assignmentGroup.releaseSharedSlot(this); if (!(isReleased() && subSlots.isEmpty())) { @@ -198,11 +201,6 @@ public CompletableFuture releaseSlot() { return CompletableFuture.completedFuture(null); } - @Override - public void releaseInstanceSlot() { - releaseSlot(); - } - @Override public int getPhysicalSlotNumber() { return getRootSlotNumber(); @@ -214,8 +212,14 @@ public AllocationID getAllocationId() { } @Override - public SlotRequestID getSlotRequestId() { - return getSlotContext().getSlotRequestId(); + public SlotRequestId getSlotRequestId() { + return NO_SLOT_REQUEST_ID; + } + + @Nullable + @Override + public SlotSharingGroupId getSlotSharingGroupId() { + return NO_SLOT_SHARING_GROUP_ID; } /** diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SimpleSlot.java b/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SimpleSlot.java index e98832f896a62..e69247ebc0f32 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SimpleSlot.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SimpleSlot.java @@ -20,10 +20,11 @@ import org.apache.flink.runtime.clusterframework.types.AllocationID; import org.apache.flink.runtime.jobmanager.scheduler.Locality; -import org.apache.flink.runtime.jobmanager.slots.SimpleSlotContext; -import org.apache.flink.runtime.jobmanager.slots.SlotContext; -import org.apache.flink.runtime.jobmanager.slots.SlotOwner; import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.jobmaster.SlotContext; +import org.apache.flink.runtime.jobmaster.SlotOwner; +import org.apache.flink.runtime.jobmaster.SlotRequestId; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import org.apache.flink.util.AbstractID; import org.apache.flink.util.FlinkException; @@ -69,8 +70,8 @@ public class SimpleSlot extends Slot implements LogicalSlot { * @param taskManagerGateway The gateway to communicate with the TaskManager of this slot */ public SimpleSlot( - SlotOwner owner, TaskManagerLocation location, int slotNumber, - TaskManagerGateway taskManagerGateway) { + SlotOwner owner, TaskManagerLocation location, int slotNumber, + TaskManagerGateway taskManagerGateway) { this(owner, location, slotNumber, taskManagerGateway, null, null); } @@ -97,7 +98,6 @@ public SimpleSlot( parent != null ? parent.getSlotContext() : new SimpleSlotContext( - NO_SLOT_REQUEST_ID, NO_ALLOCATION_ID, location, slotNumber, @@ -218,18 +218,13 @@ public void setLocality(Locality locality) { // ------------------------------------------------------------------------ @Override - public void releaseInstanceSlot() { - releaseSlot(); - } - - @Override - public CompletableFuture releaseSlot() { + public CompletableFuture releaseSlot(@Nullable Throwable cause) { if (!isCanceled()) { final CompletableFuture terminationFuture; if (payload != null) { // trigger the failure of the slot payload - payload.fail(new FlinkException("TaskManager was lost/killed: " + getTaskManagerLocation())); + payload.fail(cause != null ? cause : new FlinkException("TaskManager was lost/killed: " + getTaskManagerLocation())); // wait for the termination of the payload before releasing the slot terminationFuture = payload.getTerminalStateFuture(); @@ -276,8 +271,14 @@ public AllocationID getAllocationId() { } @Override - public SlotRequestID getSlotRequestId() { - return getSlotContext().getSlotRequestId(); + public SlotRequestId getSlotRequestId() { + return NO_SLOT_REQUEST_ID; + } + + @Nullable + @Override + public SlotSharingGroupId getSlotSharingGroupId() { + return NO_SLOT_SHARING_GROUP_ID; } // ------------------------------------------------------------------------ diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/slots/SimpleSlotContext.java b/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SimpleSlotContext.java similarity index 86% rename from flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/slots/SimpleSlotContext.java rename to flink-runtime/src/main/java/org/apache/flink/runtime/instance/SimpleSlotContext.java index a5b75d74fdc6d..95dd1f6f9e6e4 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/slots/SimpleSlotContext.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SimpleSlotContext.java @@ -16,10 +16,11 @@ * limitations under the License. */ -package org.apache.flink.runtime.jobmanager.slots; +package org.apache.flink.runtime.instance; import org.apache.flink.runtime.clusterframework.types.AllocationID; -import org.apache.flink.runtime.instance.SlotRequestID; +import org.apache.flink.runtime.jobmaster.SlotContext; +import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import org.apache.flink.util.Preconditions; @@ -28,8 +29,6 @@ */ public class SimpleSlotContext implements SlotContext { - private final SlotRequestID slotRequestId; - private final AllocationID allocationId; private final TaskManagerLocation taskManagerLocation; @@ -39,23 +38,16 @@ public class SimpleSlotContext implements SlotContext { private final TaskManagerGateway taskManagerGateway; public SimpleSlotContext( - SlotRequestID slotRequestId, AllocationID allocationId, TaskManagerLocation taskManagerLocation, int physicalSlotNumber, TaskManagerGateway taskManagerGateway) { - this.slotRequestId = Preconditions.checkNotNull(slotRequestId); this.allocationId = Preconditions.checkNotNull(allocationId); this.taskManagerLocation = Preconditions.checkNotNull(taskManagerLocation); this.physicalSlotNumber = physicalSlotNumber; this.taskManagerGateway = Preconditions.checkNotNull(taskManagerGateway); } - @Override - public SlotRequestID getSlotRequestId() { - return slotRequestId; - } - @Override public AllocationID getAllocationId() { return allocationId; diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/Slot.java b/flink-runtime/src/main/java/org/apache/flink/runtime/instance/Slot.java index e82f0758321df..dbd655431fa3b 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/Slot.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/instance/Slot.java @@ -20,15 +20,16 @@ import org.apache.flink.runtime.clusterframework.types.AllocationID; import org.apache.flink.runtime.clusterframework.types.ResourceID; -import org.apache.flink.runtime.jobmanager.slots.SimpleSlotContext; -import org.apache.flink.runtime.jobmanager.slots.SlotContext; -import org.apache.flink.runtime.jobmanager.slots.SlotOwner; import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; +import org.apache.flink.runtime.jobmaster.SlotContext; +import org.apache.flink.runtime.jobmaster.SlotOwner; +import org.apache.flink.runtime.jobmaster.SlotRequestId; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import org.apache.flink.util.AbstractID; import javax.annotation.Nullable; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; import static org.apache.flink.util.Preconditions.checkArgument; @@ -62,7 +63,8 @@ public abstract class Slot { // temporary placeholder for Slots that are not constructed from an AllocatedSlot (prior to FLIP-6) protected static final AllocationID NO_ALLOCATION_ID = new AllocationID(0L, 0L); - protected static final SlotRequestID NO_SLOT_REQUEST_ID = new SlotRequestID(0L, 0L); + protected static final SlotRequestId NO_SLOT_REQUEST_ID = new SlotRequestId(0L, 0L); + protected static final SlotSharingGroupId NO_SLOT_SHARING_GROUP_ID = new SlotSharingGroupId(0L, 0L); // ------------------------------------------------------------------------ @@ -112,7 +114,6 @@ protected Slot( // create a simple slot context this.slotContext = new SimpleSlotContext( - NO_SLOT_REQUEST_ID, NO_ALLOCATION_ID, location, slotNumber, @@ -333,7 +334,7 @@ final boolean markReleased() { * If this slot is a simple slot, it will be returned to its instance. If it is a shared slot, * it will release all of its sub-slots and release itself. */ - public abstract void releaseInstanceSlot(); + public abstract CompletableFuture releaseSlot(@Nullable Throwable cause); // -------------------------------------------------------------------------------------------- diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotSharingGroupAssignment.java b/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotSharingGroupAssignment.java index 45b4a969a6233..289762c82e198 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotSharingGroupAssignment.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotSharingGroupAssignment.java @@ -18,28 +18,29 @@ package org.apache.flink.runtime.instance; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.runtime.clusterframework.types.ResourceID; +import org.apache.flink.runtime.executiongraph.ExecutionVertex; +import org.apache.flink.runtime.jobgraph.JobVertexID; import org.apache.flink.runtime.jobmanager.scheduler.CoLocationConstraint; import org.apache.flink.runtime.jobmanager.scheduler.Locality; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import org.apache.flink.util.AbstractID; -import org.apache.flink.runtime.executiongraph.ExecutionVertex; -import org.apache.flink.runtime.jobgraph.JobVertexID; +import org.apache.flink.util.FlinkException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + /** * The SlotSharingGroupAssignment manages a set of shared slots, which are shared between @@ -215,7 +216,7 @@ private SimpleSlot addSharedSlotAndAllocateSubSlot( // note that this does implicitly release the slot we have just added // as well, because we release its last child slot. That is expected // and desired. - constraintGroupSlot.releaseInstanceSlot(); + constraintGroupSlot.releaseSlot(new FlinkException("Could not create a sub slot in this shared slot.")); } } else { @@ -273,7 +274,7 @@ private SimpleSlot addSharedSlotAndAllocateSubSlot( */ public SimpleSlot getSlotForTask(JobVertexID vertexID, Iterable locationPreferences) { synchronized (lock) { - Tuple2 p = getSlotForTaskInternal(vertexID, locationPreferences, false); + Tuple2 p = getSharedSlotForTask(vertexID, locationPreferences, false); if (p != null) { SharedSlot ss = p.f0; @@ -324,7 +325,7 @@ else if (constraint.isAssigned()) { } TaskManagerLocation location = previous.getTaskManagerLocation(); - Tuple2 p = getSlotForTaskInternal( + Tuple2 p = getSharedSlotForTask( constraint.getGroupId(), Collections.singleton(location), true); if (p == null) { @@ -355,7 +356,7 @@ else if (constraint.isAssigned()) { // grab a new slot and initialize the constraint with that one. // preferred locations are defined by the vertex Tuple2 p = - getSlotForTaskInternal(constraint.getGroupId(), locationPreferences, false); + getSharedSlotForTask(constraint.getGroupId(), locationPreferences, false); if (p == null) { // could not get a shared slot for this co-location-group return null; @@ -382,9 +383,10 @@ else if (constraint.isAssigned()) { } - private Tuple2 getSlotForTaskInternal( - AbstractID groupId, Iterable preferredLocations, boolean localOnly) - { + public Tuple2 getSharedSlotForTask( + AbstractID groupId, + Iterable preferredLocations, + boolean localOnly) { // check if there is anything at all in this group assignment if (allSlots.isEmpty()) { return null; @@ -507,7 +509,7 @@ void releaseSimpleSlot(SimpleSlot simpleSlot) { } /** - * Called from {@link org.apache.flink.runtime.instance.SharedSlot#releaseInstanceSlot()}. + * Called from {@link org.apache.flink.runtime.instance.SharedSlot#releaseSlot(Throwable)}. * * @param sharedSlot The slot to be released. */ @@ -517,10 +519,11 @@ void releaseSharedSlot(SharedSlot sharedSlot) { // we are releasing this slot if (sharedSlot.hasChildren()) { + final FlinkException cause = new FlinkException("Releasing shared slot parent."); // by simply releasing all children, we should eventually release this slot. Set children = sharedSlot.getSubSlots(); while (children.size() > 0) { - children.iterator().next().releaseInstanceSlot(); + children.iterator().next().releaseSlot(cause); } } else { diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotRequestID.java b/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotSharingGroupId.java similarity index 73% rename from flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotRequestID.java rename to flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotSharingGroupId.java index 8e199441cdaf3..e5d4467b641b3 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotRequestID.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotSharingGroupId.java @@ -20,15 +20,13 @@ import org.apache.flink.util.AbstractID; -/** - * Request ID identifying different slot requests. - */ -public final class SlotRequestID extends AbstractID { - private static final long serialVersionUID = -6072105912250154283L; +public class SlotSharingGroupId extends AbstractID { + private static final long serialVersionUID = 8837647978345422042L; - public SlotRequestID(long lowerPart, long upperPart) { - super(lowerPart, upperPart); - } + public SlotSharingGroupId(long lowerPart, long upperPart) { + super(lowerPart, upperPart); + } - public SlotRequestID() {} + public SlotSharingGroupId() { + } } diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/CoLocationConstraint.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/CoLocationConstraint.java index ffc1a7c82c8ef..baa452f6d122a 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/CoLocationConstraint.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/CoLocationConstraint.java @@ -18,16 +18,20 @@ package org.apache.flink.runtime.jobmanager.scheduler; -import org.apache.flink.runtime.clusterframework.types.ResourceID; +import org.apache.flink.runtime.instance.Instance; +import org.apache.flink.runtime.instance.SharedSlot; +import org.apache.flink.runtime.jobmaster.SlotRequestId; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import org.apache.flink.util.AbstractID; -import org.apache.flink.runtime.instance.Instance; - +import org.apache.flink.util.FlinkException; import org.apache.flink.util.Preconditions; -import org.apache.flink.runtime.instance.SharedSlot; -import static org.apache.flink.util.Preconditions.checkState; +import javax.annotation.Nullable; + +import java.util.Objects; + import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; /** * A CoLocationConstraint manages the location of a set of tasks @@ -43,12 +47,14 @@ public class CoLocationConstraint { private volatile SharedSlot sharedSlot; - private volatile ResourceID lockedLocation; + private volatile TaskManagerLocation lockedLocation; + private volatile SlotRequestId slotRequestId; CoLocationConstraint(CoLocationGroup group) { Preconditions.checkNotNull(group); this.group = group; + this.slotRequestId = null; } // ------------------------------------------------------------------------ @@ -107,7 +113,7 @@ public boolean isAssignedAndAlive() { */ public TaskManagerLocation getLocation() { if (lockedLocation != null) { - return sharedSlot.getTaskManagerLocation(); + return lockedLocation; } else { throw new IllegalStateException("Location not yet locked"); } @@ -136,12 +142,12 @@ public void setSharedSlot(SharedSlot newSlot) { this.sharedSlot = newSlot; } else if (newSlot != this.sharedSlot){ - if (lockedLocation != null && lockedLocation != newSlot.getTaskManagerID()) { + if (lockedLocation != null && !Objects.equals(lockedLocation, newSlot.getTaskManagerLocation())) { throw new IllegalArgumentException( "Cannot assign different location to a constraint whose location is locked."); } if (this.sharedSlot.isAlive()) { - this.sharedSlot.releaseInstanceSlot(); + this.sharedSlot.releaseSlot(new FlinkException("Setting new shared slot for co-location constraint.")); } this.sharedSlot = newSlot; @@ -159,7 +165,43 @@ public void lockLocation() throws IllegalStateException { checkState(lockedLocation == null, "Location is already locked"); checkState(sharedSlot != null, "Cannot lock location without a slot."); - lockedLocation = sharedSlot.getTaskManagerID(); + lockedLocation = sharedSlot.getTaskManagerLocation(); + } + + /** + * Locks the location of this slot. The location can be locked only once + * and only after a shared slot has been assigned. + * + *

Note: This method exists for compatibility reasons with the Flip-6 SlotPool + * + * @param taskManagerLocation to lock this co-location constraint to + */ + public void lockLocation(TaskManagerLocation taskManagerLocation) { + checkNotNull(taskManagerLocation); + checkState(lockedLocation == null, "Location is already locked."); + + lockedLocation = taskManagerLocation; + } + + /** + * Sets the slot request id of the currently assigned slot to the co-location constraint. + * All other tasks belonging to this co-location constraint will be deployed to the same slot. + * + * @param slotRequestId identifying the assigned slot for this co-location constraint + */ + public void setSlotRequestId(@Nullable SlotRequestId slotRequestId) { + this.slotRequestId = slotRequestId; + } + + /** + * Returns the currently assigned slot request id identifying the slot to which tasks + * belonging to this co-location constraint will be deployed to. + * + * @return Slot request id of the assigned slot or null if none + */ + @Nullable + public SlotRequestId getSlotRequestId() { + return slotRequestId; } // ------------------------------------------------------------------------ diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/NoResourceAvailableException.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/NoResourceAvailableException.java index 546f31fcd42c2..e1c1657af44de 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/NoResourceAvailableException.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/NoResourceAvailableException.java @@ -44,8 +44,8 @@ public NoResourceAvailableException(int numInstances, int numSlotsTotal, int ava NoResourceAvailableException(ScheduledUnit task, int numInstances, int numSlotsTotal, int availableSlots) { super(String.format("%s Task to schedule: < %s > with groupID < %s > in sharing group < %s >. Resources available to scheduler: Number of instances=%d, total number of slots=%d, available slots=%d", BASE_MESSAGE, task.getTaskToExecute(), - task.getLocationConstraint() == null ? task.getTaskToExecute().getVertex().getJobvertexId() : task.getLocationConstraint().getGroupId(), - task.getSlotSharingGroup(), + task.getCoLocationConstraint() == null ? task.getTaskToExecute().getVertex().getJobvertexId() : task.getCoLocationConstraint().getGroupId(), + task.getSlotSharingGroupId(), numInstances, numSlotsTotal, availableSlots)); diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/ScheduledUnit.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/ScheduledUnit.java index 7348c9d31ea71..903872b378de4 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/ScheduledUnit.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/ScheduledUnit.java @@ -19,68 +19,108 @@ package org.apache.flink.runtime.jobmanager.scheduler; import org.apache.flink.runtime.executiongraph.Execution; +import org.apache.flink.runtime.instance.SlotSharingGroupId; import org.apache.flink.runtime.jobgraph.JobVertexID; import org.apache.flink.util.Preconditions; +import javax.annotation.Nullable; + +/** + * ScheduledUnit contains the information necessary to allocate a slot for the given + * {@link JobVertexID}. + */ public class ScheduledUnit { - + + @Nullable private final Execution vertexExecution; - - private final SlotSharingGroup sharingGroup; - - private final CoLocationConstraint locationConstraint; + + private final JobVertexID jobVertexId; + + @Nullable + private final SlotSharingGroupId slotSharingGroupId; + + @Nullable + private final CoLocationConstraint coLocationConstraint; // -------------------------------------------------------------------------------------------- public ScheduledUnit(Execution task) { - Preconditions.checkNotNull(task); - - this.vertexExecution = task; - this.sharingGroup = null; - this.locationConstraint = null; + this( + Preconditions.checkNotNull(task), + task.getVertex().getJobvertexId(), + null, + null); } - public ScheduledUnit(Execution task, SlotSharingGroup sharingUnit) { - Preconditions.checkNotNull(task); - - this.vertexExecution = task; - this.sharingGroup = sharingUnit; - this.locationConstraint = null; + public ScheduledUnit(Execution task, @Nullable SlotSharingGroupId slotSharingGroupId) { + this( + Preconditions.checkNotNull(task), + task.getVertex().getJobvertexId(), + slotSharingGroupId, + null); } - public ScheduledUnit(Execution task, SlotSharingGroup sharingUnit, CoLocationConstraint locationConstraint) { - Preconditions.checkNotNull(task); - Preconditions.checkNotNull(sharingUnit); - Preconditions.checkNotNull(locationConstraint); - + public ScheduledUnit( + Execution task, + @Nullable SlotSharingGroupId slotSharingGroupId, + @Nullable CoLocationConstraint coLocationConstraint) { + this( + Preconditions.checkNotNull(task), + task.getVertex().getJobvertexId(), + slotSharingGroupId, + coLocationConstraint); + } + + public ScheduledUnit( + JobVertexID jobVertexId, + @Nullable SlotSharingGroupId slotSharingGroupId, + @Nullable CoLocationConstraint coLocationConstraint) { + this( + null, + jobVertexId, + slotSharingGroupId, + coLocationConstraint); + } + + public ScheduledUnit( + @Nullable Execution task, + JobVertexID jobVertexId, + @Nullable SlotSharingGroupId slotSharingGroupId, + @Nullable CoLocationConstraint coLocationConstraint) { + this.vertexExecution = task; - this.sharingGroup = sharingUnit; - this.locationConstraint = locationConstraint; + this.jobVertexId = Preconditions.checkNotNull(jobVertexId); + this.slotSharingGroupId = slotSharingGroupId; + this.coLocationConstraint = coLocationConstraint; + } // -------------------------------------------------------------------------------------------- public JobVertexID getJobVertexId() { - return this.vertexExecution.getVertex().getJobvertexId(); + return jobVertexId; } - + + @Nullable public Execution getTaskToExecute() { return vertexExecution; } - - public SlotSharingGroup getSlotSharingGroup() { - return sharingGroup; + + @Nullable + public SlotSharingGroupId getSlotSharingGroupId() { + return slotSharingGroupId; } - - public CoLocationConstraint getLocationConstraint() { - return locationConstraint; + + @Nullable + public CoLocationConstraint getCoLocationConstraint() { + return coLocationConstraint; } // -------------------------------------------------------------------------------------------- @Override public String toString() { - return "{task=" + vertexExecution.getVertexWithAttempt() + ", sharingUnit=" + sharingGroup + - ", locationConstraint=" + locationConstraint + '}'; + return "{task=" + vertexExecution.getVertexWithAttempt() + ", sharingUnit=" + slotSharingGroupId + + ", locationConstraint=" + coLocationConstraint + '}'; } } diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/Scheduler.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/Scheduler.java index a3c38e05cc3b6..40fb760db2880 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/Scheduler.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/Scheduler.java @@ -18,20 +18,22 @@ package org.apache.flink.runtime.jobmanager.scheduler; +import org.apache.flink.annotation.VisibleForTesting; import org.apache.flink.runtime.clusterframework.types.ResourceID; import org.apache.flink.runtime.concurrent.FutureUtils; import org.apache.flink.runtime.executiongraph.ExecutionVertex; import org.apache.flink.runtime.instance.Instance; import org.apache.flink.runtime.instance.InstanceDiedException; import org.apache.flink.runtime.instance.InstanceListener; -import org.apache.flink.runtime.instance.LogicalSlot; +import org.apache.flink.runtime.jobmaster.LogicalSlot; import org.apache.flink.runtime.instance.SharedSlot; import org.apache.flink.runtime.instance.SimpleSlot; -import org.apache.flink.runtime.instance.SlotProvider; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.instance.SlotSharingGroupAssignment; import org.apache.flink.runtime.jobgraph.JobVertexID; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import org.apache.flink.util.ExceptionUtils; +import org.apache.flink.util.FlinkException; import org.apache.flink.util.Preconditions; import org.apache.commons.lang3.tuple.ImmutablePair; @@ -39,6 +41,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.Nullable; + import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collection; @@ -49,6 +53,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Queue; import java.util.Set; import java.util.concurrent.BlockingQueue; @@ -177,7 +182,7 @@ private Object scheduleTask(ScheduledUnit task, boolean queueIfNoResource, Itera synchronized (globalLock) { - SlotSharingGroup sharingUnit = task.getSlotSharingGroup(); + SlotSharingGroup sharingUnit = vertex.getJobVertex().getSlotSharingGroup(); if (sharingUnit != null) { @@ -189,7 +194,7 @@ private Object scheduleTask(ScheduledUnit task, boolean queueIfNoResource, Itera } final SlotSharingGroupAssignment assignment = sharingUnit.getTaskAssignment(); - final CoLocationConstraint constraint = task.getLocationConstraint(); + final CoLocationConstraint constraint = task.getCoLocationConstraint(); // sanity check that we do not use an externally forced location and a co-location constraint together if (constraint != null && forceExternalLocation) { @@ -274,7 +279,7 @@ else if (slotFromGroup == null || !slotFromGroup.isAlive() || newSlot.getLocalit // if there is no slot from the group, or the new slot is local, // then we use the new slot if (slotFromGroup != null) { - slotFromGroup.releaseInstanceSlot(); + slotFromGroup.releaseSlot(null); } toUse = newSlot; } @@ -282,7 +287,7 @@ else if (slotFromGroup == null || !slotFromGroup.isAlive() || newSlot.getLocalit // both are available and usable. neither is local. in that case, we may // as well use the slot from the sharing group, to minimize the number of // instances that the job occupies - newSlot.releaseInstanceSlot(); + newSlot.releaseSlot(null); toUse = slotFromGroup; } @@ -299,10 +304,10 @@ else if (slotFromGroup == null || !slotFromGroup.isAlive() || newSlot.getLocalit } catch (Throwable t) { if (slotFromGroup != null) { - slotFromGroup.releaseInstanceSlot(); + slotFromGroup.releaseSlot(t); } if (newSlot != null) { - newSlot.releaseInstanceSlot(); + newSlot.releaseSlot(t); } ExceptionUtils.rethrow(t, "An error occurred while allocating a slot in a sharing group"); @@ -444,7 +449,7 @@ protected SimpleSlot getNewSlotForSharingGroup(ExecutionVertex vertex, } else { // could not add and allocate the sub-slot, so release shared slot - sharedSlot.releaseInstanceSlot(); + sharedSlot.releaseSlot(new FlinkException("Could not allocate sub-slot.")); } } } @@ -854,4 +859,19 @@ public CompletableFuture getFuture() { return future; } } + + // ------------------------------------------------------------------------ + // Testing methods + // ------------------------------------------------------------------------ + + @VisibleForTesting + @Nullable + public Instance getInstance(ResourceID resourceId) { + for (Instance instance : allInstances) { + if (Objects.equals(resourceId, instance.getTaskManagerID())) { + return instance; + } + } + return null; + } } diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/SlotSharingGroup.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/SlotSharingGroup.java index 0fa13629ad116..86be9d4e6bba2 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/SlotSharingGroup.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/scheduler/SlotSharingGroup.java @@ -23,6 +23,7 @@ import java.util.TreeSet; import org.apache.flink.runtime.instance.SlotSharingGroupAssignment; +import org.apache.flink.runtime.instance.SlotSharingGroupId; import org.apache.flink.runtime.jobgraph.JobVertexID; /** @@ -39,7 +40,8 @@ public class SlotSharingGroup implements java.io.Serializable { /** Mapping of tasks to subslots. This field is only needed inside the JobManager, and is not RPCed. */ private transient SlotSharingGroupAssignment taskAssignment; - + + private final SlotSharingGroupId slotSharingGroupId = new SlotSharingGroupId(); public SlotSharingGroup() {} @@ -62,8 +64,11 @@ public void removeVertexFromGroup(JobVertexID id) { public Set getJobVertexIds() { return Collections.unmodifiableSet(ids); } - - + + public SlotSharingGroupId getSlotSharingGroupId() { + return slotSharingGroupId; + } + public SlotSharingGroupAssignment getTaskAssignment() { if (this.taskAssignment == null) { this.taskAssignment = new SlotSharingGroupAssignment(); diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/slots/SlotAndLocality.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/slots/SlotAndLocality.java index 5ae057da2187f..85871c89987cd 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/slots/SlotAndLocality.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/slots/SlotAndLocality.java @@ -18,7 +18,7 @@ package org.apache.flink.runtime.jobmanager.slots; -import org.apache.flink.runtime.instance.AllocatedSlot; +import org.apache.flink.runtime.jobmaster.slotpool.AllocatedSlot; import org.apache.flink.runtime.jobmanager.scheduler.Locality; import static org.apache.flink.util.Preconditions.checkNotNull; @@ -39,11 +39,11 @@ public SlotAndLocality(AllocatedSlot slot, Locality locality) { // ------------------------------------------------------------------------ - public AllocatedSlot slot() { + public AllocatedSlot getSlot() { return slot; } - public Locality locality() { + public Locality getLocality() { return locality; } diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMaster.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMaster.java index 324557fbbe37a..7a2844d8d2588 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMaster.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/JobMaster.java @@ -55,9 +55,8 @@ import org.apache.flink.runtime.heartbeat.HeartbeatServices; import org.apache.flink.runtime.heartbeat.HeartbeatTarget; import org.apache.flink.runtime.highavailability.HighAvailabilityServices; -import org.apache.flink.runtime.instance.LogicalSlot; -import org.apache.flink.runtime.instance.SlotPool; -import org.apache.flink.runtime.instance.SlotPoolGateway; +import org.apache.flink.runtime.jobmaster.slotpool.SlotPool; +import org.apache.flink.runtime.jobmaster.slotpool.SlotPoolGateway; import org.apache.flink.runtime.io.network.partition.ResultPartitionID; import org.apache.flink.runtime.jobgraph.IntermediateDataSetID; import org.apache.flink.runtime.jobgraph.JobGraph; diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/LogicalSlot.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/LogicalSlot.java similarity index 64% rename from flink-runtime/src/main/java/org/apache/flink/runtime/instance/LogicalSlot.java rename to flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/LogicalSlot.java index b3104ac03592c..4511bf647c6db 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/LogicalSlot.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/LogicalSlot.java @@ -16,9 +16,11 @@ * limitations under the License. */ -package org.apache.flink.runtime.instance; +package org.apache.flink.runtime.jobmaster; import org.apache.flink.runtime.clusterframework.types.AllocationID; +import org.apache.flink.runtime.instance.SlotSharingGroupId; +import org.apache.flink.runtime.jobmanager.scheduler.Locality; import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; @@ -32,8 +34,22 @@ */ public interface LogicalSlot { + Payload TERMINATED_PAYLOAD = new Payload() { + + private final CompletableFuture completedTerminationFuture = CompletableFuture.completedFuture(null); + @Override + public void fail(Throwable cause) { + // ignore + } + + @Override + public CompletableFuture getTerminalStateFuture() { + return completedTerminationFuture; + } + }; + /** - * Return the TaskManager location of this slot + * Return the TaskManager location of this slot. * * @return TaskManager location of this slot */ @@ -47,18 +63,25 @@ public interface LogicalSlot { TaskManagerGateway getTaskManagerGateway(); /** - * True if the slot is still alive. + * Gets the locality of this slot. * - * @return True if the slot is still alive, otherwise false + * @return locality of this slot + */ + Locality getLocality(); + + /** + * True if the slot is alive and has not been released. + * + * @return True if the slot is alive, otherwise false if the slot is released */ boolean isAlive(); /** - * Tries to assign a payload to this slot. This can only happens - * exactly once. + * Tries to assign a payload to this slot. One can only assign a single + * payload once. * * @param payload to be assigned to this slot. - * @return true if the payload could be set, otherwise false + * @return true if the payload could be assigned, otherwise false */ boolean tryAssignPayload(Payload payload); @@ -75,8 +98,19 @@ public interface LogicalSlot { * * @return Future which is completed once the slot has been released, * in case of a failure it is completed exceptionally + * @deprecated Added because extended the actual releaseSlot method with cause parameter. + */ + default CompletableFuture releaseSlot() { + return releaseSlot(null); + } + + /** + * Releases this slot. + * + * @param cause why the slot was released or null if none + * @return future which is completed once the slot has been released */ - CompletableFuture releaseSlot(); + CompletableFuture releaseSlot(@Nullable Throwable cause); /** * Gets the slot number on the TaskManager. @@ -98,7 +132,15 @@ public interface LogicalSlot { * * @return Unique id identifying the slot request with which this slot was allocated */ - SlotRequestID getSlotRequestId(); + SlotRequestId getSlotRequestId(); + + /** + * Gets the slot sharing group id to which this slot belongs. + * + * @return slot sharing group id of this slot or null, if none. + */ + @Nullable + SlotSharingGroupId getSlotSharingGroupId(); /** * Payload for a logical slot. diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/slots/SlotContext.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/SlotContext.java similarity index 80% rename from flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/slots/SlotContext.java rename to flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/SlotContext.java index 1e0317a1f75ce..65bf2a1a8efc0 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/slots/SlotContext.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/SlotContext.java @@ -16,27 +16,18 @@ * limitations under the License. */ -package org.apache.flink.runtime.jobmanager.slots; +package org.apache.flink.runtime.jobmaster; import org.apache.flink.runtime.clusterframework.types.AllocationID; -import org.apache.flink.runtime.instance.Slot; -import org.apache.flink.runtime.instance.SlotRequestID; +import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; /** - * Interface for the context of a logical {@link Slot}. This context contains information + * Interface for the context of a {@link LogicalSlot}. This context contains information * about the underlying allocated slot and how to communicate with the TaskManager on which * it was allocated. */ public interface SlotContext { - - /** - * Gets the slot request id under which the slot has been requested. This id uniquely identifies the logical slot. - * - * @return The id under which the slot has been requested - */ - SlotRequestID getSlotRequestId(); - /** * Gets the id under which the slot has been allocated on the TaskManager. This id uniquely identifies the * physical slot. diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/slots/SlotOwner.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/SlotOwner.java similarity index 92% rename from flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/slots/SlotOwner.java rename to flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/SlotOwner.java index bc1ced46807f7..9cc6f813d60c8 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmanager/slots/SlotOwner.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/SlotOwner.java @@ -16,9 +16,7 @@ * limitations under the License. */ -package org.apache.flink.runtime.jobmanager.slots; - -import org.apache.flink.runtime.instance.LogicalSlot; +package org.apache.flink.runtime.jobmaster; import java.util.concurrent.CompletableFuture; diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/SlotRequestId.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/SlotRequestId.java new file mode 100644 index 0000000000000..d3fa775a3100d --- /dev/null +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/SlotRequestId.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.jobmaster; + +import org.apache.flink.runtime.jobmaster.slotpool.SlotPool; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; +import org.apache.flink.util.AbstractID; + +/** + * Request id identifying slot requests made by the {@link SlotProvider} towards the + * {@link SlotPool}. + */ +public final class SlotRequestId extends AbstractID { + private static final long serialVersionUID = -6072105912250154283L; + + public SlotRequestId(long lowerPart, long upperPart) { + super(lowerPart, upperPart); + } + + public SlotRequestId() {} +} diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/AllocatedSlot.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/AllocatedSlot.java similarity index 54% rename from flink-runtime/src/main/java/org/apache/flink/runtime/instance/AllocatedSlot.java rename to flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/AllocatedSlot.java index 97be59245c763..a560ebc7c7ed5 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/AllocatedSlot.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/AllocatedSlot.java @@ -16,36 +16,32 @@ * limitations under the License. */ -package org.apache.flink.runtime.instance; +package org.apache.flink.runtime.jobmaster.slotpool; import org.apache.flink.runtime.clusterframework.types.AllocationID; import org.apache.flink.runtime.clusterframework.types.ResourceID; import org.apache.flink.runtime.clusterframework.types.ResourceProfile; -import org.apache.flink.runtime.jobmanager.scheduler.Locality; -import org.apache.flink.runtime.jobmanager.slots.SlotContext; -import org.apache.flink.runtime.jobmanager.slots.SlotException; -import org.apache.flink.runtime.jobmanager.slots.SlotOwner; import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; +import org.apache.flink.runtime.jobmaster.SlotContext; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; -import org.apache.flink.util.Preconditions; import java.util.concurrent.atomic.AtomicReference; import static org.apache.flink.util.Preconditions.checkNotNull; /** - * The {@code AllocatedSlot} represents a slot that the JobManager allocated from a TaskManager. - * It represents a slice of allocated resources from the TaskManager. + * The {@code AllocatedSlot} represents a slot that the JobMaster allocated from a TaskExecutor. + * It represents a slice of allocated resources from the TaskExecutor. * *

To allocate an {@code AllocatedSlot}, the requests a slot from the ResourceManager. The - * ResourceManager picks (or starts) a TaskManager that will then allocate the slot to the - * JobManager and notify the JobManager. + * ResourceManager picks (or starts) a TaskExecutor that will then allocate the slot to the + * JobMaster and notify the JobMaster. * *

Note: Prior to the resource management changes introduced in (Flink Improvement Proposal 6), * an AllocatedSlot was allocated to the JobManager as soon as the TaskManager registered at the * JobManager. All slots had a default unknown resource profile. */ -public class AllocatedSlot { +public class AllocatedSlot implements SlotContext { /** The ID under which the slot is allocated. Uniquely identifies the slot. */ private final AllocationID allocationId; @@ -62,9 +58,7 @@ public class AllocatedSlot { /** The number of the slot on the TaskManager to which slot belongs. Purely informational. */ private final int physicalSlotNumber; - private final SlotOwner slotOwner; - - private final AtomicReference logicalSlotReference; + private final AtomicReference payloadReference; // ------------------------------------------------------------------------ @@ -73,16 +67,14 @@ public AllocatedSlot( TaskManagerLocation location, int physicalSlotNumber, ResourceProfile resourceProfile, - TaskManagerGateway taskManagerGateway, - SlotOwner slotOwner) { + TaskManagerGateway taskManagerGateway) { this.allocationId = checkNotNull(allocationId); this.taskManagerLocation = checkNotNull(location); this.physicalSlotNumber = physicalSlotNumber; this.resourceProfile = checkNotNull(resourceProfile); this.taskManagerGateway = checkNotNull(taskManagerGateway); - this.slotOwner = checkNotNull(slotOwner); - logicalSlotReference = new AtomicReference<>(null); + payloadReference = new AtomicReference<>(null); } // ------------------------------------------------------------------------ @@ -137,91 +129,55 @@ public TaskManagerGateway getTaskManagerGateway() { } /** - * Returns true if this slot is not being used (e.g. a logical slot is allocated from this slot). + * Returns the physical slot number of the allocated slot. The physical slot number corresponds + * to the slot index on the TaskExecutor. * - * @return true if a logical slot is allocated from this slot, otherwise false - */ - public boolean isUsed() { - return logicalSlotReference.get() != null; - } - - /** - * Triggers the release of the logical slot. + * @return Physical slot number of the allocated slot */ - public void triggerLogicalSlotRelease() { - final LogicalSlot logicalSlot = logicalSlotReference.get(); - - if (logicalSlot != null) { - logicalSlot.releaseSlot(); - } + public int getPhysicalSlotNumber() { + return physicalSlotNumber; } /** - * Releases the logical slot. + * Returns true if this slot is not being used (e.g. a logical slot is allocated from this slot). * - * @return true if the logical slot could be released, false otherwise. + * @return true if a logical slot is allocated from this slot, otherwise false */ - public boolean releaseLogicalSlot() { - final LogicalSlot logicalSlot = logicalSlotReference.get(); - - if (logicalSlot != null) { - if (logicalSlot instanceof Slot) { - final Slot slot = (Slot) logicalSlot; - if (slot.markReleased()) { - logicalSlotReference.set(null); - return true; - } - } else { - throw new RuntimeException("Unsupported logical slot type encountered " + logicalSlot.getClass()); - } - - } - - return false; + public boolean isUsed() { + return payloadReference.get() != null; } /** - * Allocates a logical {@link SimpleSlot}. + * Tries to assign the given payload to this allocated slot. This only works if there has not + * been another payload assigned to this slot. * - * @param slotRequestId identifying the corresponding slot request - * @param locality specifying the locality of the allocated slot - * @return an allocated logical simple slot - * @throws SlotException if we could not allocate a simple slot + * @param payload to assign to this slot + * @return true if the payload could be assigned, otherwise false */ - public SimpleSlot allocateSimpleSlot(SlotRequestID slotRequestId, Locality locality) throws SlotException { - final AllocatedSlotContext allocatedSlotContext = new AllocatedSlotContext( - slotRequestId); - - final SimpleSlot simpleSlot = new SimpleSlot(allocatedSlotContext, slotOwner, physicalSlotNumber); - - if (logicalSlotReference.compareAndSet(null, simpleSlot)) { - simpleSlot.setLocality(locality); - return simpleSlot; - } else { - throw new SlotException("Could not allocate logical simple slot because the allocated slot is already used."); - } + public boolean tryAssignPayload(Payload payload) { + return payloadReference.compareAndSet(null, payload); } /** - * Allocates a logical {@link SharedSlot}. + * Triggers the release of the assigned payload. If the payload could be released, + * then it is removed from the slot. * - * @param slotRequestId identifying the corresponding slot request - * @param slotSharingGroupAssignment the slot sharing group to which the shared slot shall belong - * @return an allocated logical shared slot - * @throws SlotException if we could not allocate a shared slot + * @param cause of the release operation + * @return true if the payload could be released and was removed from the slot, otherwise false */ - public SharedSlot allocateSharedSlot(SlotRequestID slotRequestId, SlotSharingGroupAssignment slotSharingGroupAssignment) throws SlotException { - - final AllocatedSlotContext allocatedSlotContext = new AllocatedSlotContext( - slotRequestId); - final SharedSlot sharedSlot = new SharedSlot(allocatedSlotContext, slotOwner, slotSharingGroupAssignment); + public boolean releasePayload(Throwable cause) { + final Payload payload = payloadReference.get(); - if (logicalSlotReference.compareAndSet(null, sharedSlot)) { + if (payload != null) { + if (payload.release(cause)) { + payloadReference.set(null); - - return sharedSlot; + return true; + } else { + return false; + } } else { - throw new SlotException("Could not allocate logical shared slot because the allocated slot is already used."); + return true; } } @@ -248,40 +204,22 @@ public String toString() { return "AllocatedSlot " + allocationId + " @ " + taskManagerLocation + " - " + physicalSlotNumber; } + // ----------------------------------------------------------------------- + // Interfaces + // ----------------------------------------------------------------------- + /** - * Slot context for {@link AllocatedSlot}. + * Payload which can be assigned to an {@link AllocatedSlot}. */ - private final class AllocatedSlotContext implements SlotContext { - - private final SlotRequestID slotRequestId; - - private AllocatedSlotContext(SlotRequestID slotRequestId) { - this.slotRequestId = Preconditions.checkNotNull(slotRequestId); - } - - @Override - public SlotRequestID getSlotRequestId() { - return slotRequestId; - } - - @Override - public AllocationID getAllocationId() { - return allocationId; - } - - @Override - public TaskManagerLocation getTaskManagerLocation() { - return taskManagerLocation; - } - - @Override - public int getPhysicalSlotNumber() { - return physicalSlotNumber; - } - - @Override - public TaskManagerGateway getTaskManagerGateway() { - return taskManagerGateway; - } + interface Payload { + + /** + * Releases the payload. If the payload could be released, then it returns true, + * otherwise false. + * + * @param cause of the payload release + * @return true if the payload could be released, otherwise false + */ + boolean release(Throwable cause); } } diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/AllocatedSlotActions.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/AllocatedSlotActions.java new file mode 100644 index 0000000000000..045678e35b57e --- /dev/null +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/AllocatedSlotActions.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.jobmaster.slotpool; + +import org.apache.flink.runtime.jobmaster.SlotRequestId; +import org.apache.flink.runtime.instance.SlotSharingGroupId; +import org.apache.flink.runtime.messages.Acknowledge; + +import javax.annotation.Nullable; + +import java.util.concurrent.CompletableFuture; + +/** + * Interface for components which have to perform actions on allocated slots. + */ +public interface AllocatedSlotActions { + + /** + * Releases the slot with the given {@link SlotRequestId}. If the slot belonged to a + * slot sharing group, then the corresponding {@link SlotSharingGroupId} has to be + * provided. Additionally, one can provide a cause for the slot release. + * + * @param slotRequestId identifying the slot to release + * @param slotSharingGroupId identifying the slot sharing group to which the slot belongs, null if none + * @param cause of the slot release, null if none + * @return Acknowledge (future) after the slot has been released + */ + CompletableFuture releaseSlot( + SlotRequestId slotRequestId, + @Nullable SlotSharingGroupId slotSharingGroupId, + @Nullable Throwable cause); +} diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/DualKeyMap.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/DualKeyMap.java similarity index 89% rename from flink-runtime/src/main/java/org/apache/flink/runtime/instance/DualKeyMap.java rename to flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/DualKeyMap.java index 741d137206918..04b3ca644c69e 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/DualKeyMap.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/DualKeyMap.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.flink.runtime.instance; +package org.apache.flink.runtime.jobmaster.slotpool; import org.apache.flink.api.java.tuple.Tuple2; @@ -130,6 +130,13 @@ public void clear() { bMap.clear(); } + // ----------------------------------------------------------------------- + // Inner classes + // ----------------------------------------------------------------------- + + /** + * Collection which contains the values of the dual key map. + */ private final class Values extends AbstractCollection { @Override @@ -143,6 +150,9 @@ public int size() { } } + /** + * Iterator which iterates over the values of the dual key map. + */ private final class ValueIterator implements Iterator { private final Iterator> iterator = aMap.values().iterator(); diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SingleLogicalSlot.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SingleLogicalSlot.java new file mode 100644 index 0000000000000..9bd559bc166ef --- /dev/null +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SingleLogicalSlot.java @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.jobmaster.slotpool; + +import org.apache.flink.runtime.clusterframework.types.AllocationID; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.jobmaster.SlotContext; +import org.apache.flink.runtime.jobmaster.SlotOwner; +import org.apache.flink.runtime.jobmaster.SlotRequestId; +import org.apache.flink.runtime.instance.SlotSharingGroupId; +import org.apache.flink.runtime.jobmanager.scheduler.Locality; +import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; +import org.apache.flink.runtime.taskmanager.TaskManagerLocation; +import org.apache.flink.util.Preconditions; + +import javax.annotation.Nullable; + +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; +import java.util.function.Function; + +/** + * Implementation of the {@link LogicalSlot} which is used by the {@link SlotPool}. + */ +public class SingleLogicalSlot implements LogicalSlot, AllocatedSlot.Payload { + + private static final AtomicReferenceFieldUpdater PAYLOAD_UPDATER = AtomicReferenceFieldUpdater.newUpdater( + SingleLogicalSlot.class, + Payload.class, + "payload"); + + private final SlotRequestId slotRequestId; + + private final SlotContext slotContext; + + // null if the logical slot does not belong to a slot sharing group, otherwise non-null + @Nullable + private final SlotSharingGroupId slotSharingGroupId; + + // locality of this slot wrt the requested preferred locations + private final Locality locality; + + // owner of this slot to which it is returned upon release + private final SlotOwner slotOwner; + + // LogicalSlot.Payload of this slot + private volatile Payload payload; + + public SingleLogicalSlot( + SlotRequestId slotRequestId, + SlotContext slotContext, + @Nullable SlotSharingGroupId slotSharingGroupId, + Locality locality, + SlotOwner slotOwner) { + this.slotRequestId = Preconditions.checkNotNull(slotRequestId); + this.slotContext = Preconditions.checkNotNull(slotContext); + this.slotSharingGroupId = slotSharingGroupId; + this.locality = Preconditions.checkNotNull(locality); + this.slotOwner = Preconditions.checkNotNull(slotOwner); + + payload = null; + } + + @Override + public TaskManagerLocation getTaskManagerLocation() { + return slotContext.getTaskManagerLocation(); + } + + @Override + public TaskManagerGateway getTaskManagerGateway() { + return slotContext.getTaskManagerGateway(); + } + + @Override + public Locality getLocality() { + return locality; + } + + @Override + public boolean isAlive() { + final Payload currentPayload = payload; + + if (currentPayload != null) { + return !currentPayload.getTerminalStateFuture().isDone(); + } else { + // We are always alive if there is no payload assigned yet. + // If this slot is released and no payload is assigned, then the TERMINATED_PAYLOAD is assigned + return true; + } + } + + @Override + public boolean tryAssignPayload(Payload payload) { + Preconditions.checkNotNull(payload); + return PAYLOAD_UPDATER.compareAndSet(this, null, payload); + } + + @Nullable + @Override + public Payload getPayload() { + return payload; + } + + @Override + public CompletableFuture releaseSlot(@Nullable Throwable cause) { + // set an already terminated payload if the payload of this slot is still empty + tryAssignPayload(TERMINATED_PAYLOAD); + + // notify the payload that the slot will be released + payload.fail(cause); + + // Wait until the payload has been terminated. Only then, we return the slot to its rightful owner + return payload.getTerminalStateFuture() + .handle((Object ignored, Throwable throwable) -> slotOwner.returnAllocatedSlot(this)) + .thenApply(Function.identity()); + } + + @Override + public int getPhysicalSlotNumber() { + return slotContext.getPhysicalSlotNumber(); + } + + @Override + public AllocationID getAllocationId() { + return slotContext.getAllocationId(); + } + + @Override + public SlotRequestId getSlotRequestId() { + return slotRequestId; + } + + @Nullable + @Override + public SlotSharingGroupId getSlotSharingGroupId() { + return slotSharingGroupId; + } + + // ------------------------------------------------------------------------- + // AllocatedSlot.Payload implementation + // ------------------------------------------------------------------------- + + /** + * A release of the payload by the {@link AllocatedSlot} triggers a release of the payload of + * the logical slot. + * + * @param cause of the payload release + * @return true if the logical slot's payload could be released, otherwise false + */ + @Override + public boolean release(Throwable cause) { + return releaseSlot(cause).isDone(); + } +} diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotPool.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPool.java similarity index 63% rename from flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotPool.java rename to flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPool.java index 68f5be66e3100..996e4455e2758 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotPool.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPool.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.flink.runtime.instance; +package org.apache.flink.runtime.jobmaster.slotpool; import org.apache.flink.annotation.VisibleForTesting; import org.apache.flink.api.common.JobID; @@ -25,14 +25,19 @@ import org.apache.flink.runtime.clusterframework.types.ResourceID; import org.apache.flink.runtime.clusterframework.types.ResourceProfile; import org.apache.flink.runtime.concurrent.FutureUtils; +import org.apache.flink.runtime.executiongraph.ExecutionGraph; +import org.apache.flink.runtime.instance.SlotSharingGroupId; +import org.apache.flink.runtime.jobmanager.scheduler.CoLocationConstraint; import org.apache.flink.runtime.jobmanager.scheduler.Locality; import org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException; import org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit; import org.apache.flink.runtime.jobmanager.slots.SlotAndLocality; -import org.apache.flink.runtime.jobmanager.slots.SlotException; -import org.apache.flink.runtime.jobmanager.slots.SlotOwner; import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; import org.apache.flink.runtime.jobmaster.JobMasterId; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.jobmaster.SlotContext; +import org.apache.flink.runtime.jobmaster.SlotOwner; +import org.apache.flink.runtime.jobmaster.SlotRequestId; import org.apache.flink.runtime.messages.Acknowledge; import org.apache.flink.runtime.resourcemanager.ResourceManagerGateway; import org.apache.flink.runtime.resourcemanager.SlotRequest; @@ -42,6 +47,8 @@ import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import org.apache.flink.runtime.util.clock.Clock; import org.apache.flink.runtime.util.clock.SystemClock; +import org.apache.flink.util.AbstractID; +import org.apache.flink.util.FlinkException; import org.apache.flink.util.Preconditions; import org.slf4j.Logger; @@ -55,9 +62,9 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.Set; -import java.util.concurrent.CancellationException; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; import java.util.concurrent.TimeoutException; @@ -67,22 +74,22 @@ import static org.apache.flink.util.Preconditions.checkNotNull; /** - * The slot pool serves slot request issued by Scheduler or ExecutionGraph. It will will attempt to acquire new slots + * The slot pool serves slot request issued by {@link ExecutionGraph}. It will will attempt to acquire new slots * from the ResourceManager when it cannot serve a slot request. If no ResourceManager is currently available, * or it gets a decline from the ResourceManager, or a request times out, it fails the slot request. The slot pool also * holds all the slots that were offered to it and accepted, and can thus provides registered free slots even if the * ResourceManager is down. The slots will only be released when they are useless, e.g. when the job is fully running * but we still have some free slots. - *

- * All the allocation or the slot offering will be identified by self generated AllocationID, we will use it to + * + *

All the allocation or the slot offering will be identified by self generated AllocationID, we will use it to * eliminate ambiguities. - * + * * TODO : Make pending requests location preference aware * TODO : Make pass location preferences to ResourceManager when sending a slot request */ -public class SlotPool extends RpcEndpoint implements SlotPoolGateway { +public class SlotPool extends RpcEndpoint implements SlotPoolGateway, AllocatedSlotActions { - /** The log for the pool - shared also with the internal classes */ + /** The log for the pool - shared also with the internal classes. */ static final Logger LOG = LoggerFactory.getLogger(SlotPool.class); // ------------------------------------------------------------------------ @@ -99,33 +106,36 @@ public class SlotPool extends RpcEndpoint implements SlotPoolGateway { private final ProviderAndOwner providerAndOwner; - /** All registered TaskManagers, slots will be accepted and used only if the resource is registered */ + /** All registered TaskManagers, slots will be accepted and used only if the resource is registered. */ private final HashSet registeredTaskManagers; - /** The book-keeping of all allocated slots */ + /** The book-keeping of all allocated slots. */ private final AllocatedSlots allocatedSlots; - /** The book-keeping of all available slots */ + /** The book-keeping of all available slots. */ private final AvailableSlots availableSlots; - /** All pending requests waiting for slots */ - private final DualKeyMap pendingRequests; + /** All pending requests waiting for slots. */ + private final DualKeyMap pendingRequests; - /** The requests that are waiting for the resource manager to be connected */ - private final HashMap waitingForResourceManager; + /** The requests that are waiting for the resource manager to be connected. */ + private final HashMap waitingForResourceManager; - /** Timeout for request calls to the ResourceManager */ + /** Timeout for request calls to the ResourceManager. */ private final Time resourceManagerRequestsTimeout; - /** Timeout for allocation round trips (RM -> launch TM -> offer slot) */ + /** Timeout for allocation round trips (RM -> launch TM -> offer slot). */ private final Time resourceManagerAllocationTimeout; private final Clock clock; - /** the fencing token of the job manager */ + /** Managers for the different slot sharing groups. */ + protected final Map slotSharingManagers; + + /** the fencing token of the job manager. */ private JobMasterId jobMasterId; - /** The gateway to communicate with resource manager */ + /** The gateway to communicate with resource manager. */ private ResourceManagerGateway resourceManagerGateway; private String jobManagerAddress; @@ -160,6 +170,8 @@ public SlotPool( this.providerAndOwner = new ProviderAndOwner(getSelfGateway(SlotPoolGateway.class), slotRequestTimeout); + this.slotSharingManagers = new HashMap<>(4); + this.jobMasterId = null; this.resourceManagerGateway = null; this.jobManagerAddress = null; @@ -219,9 +231,9 @@ public void suspend() { /** * Gets the slot owner implementation for this pool. - * + * *

This method does not mutate state and can be called directly (no RPC indirection) - * + * * @return The slot owner implementation for this pool. */ public SlotOwner getSlotOwner() { @@ -267,115 +279,349 @@ public void disconnectResourceManager() { @Override public CompletableFuture allocateSlot( - SlotRequestID requestId, - ScheduledUnit task, - ResourceProfile resources, - Iterable locationPreferences, + SlotRequestId slotRequestId, + ScheduledUnit scheduledUnit, + ResourceProfile resourceProfile, + Collection locationPreferences, + boolean allowQueuedScheduling, Time timeout) { - return internalAllocateSlot(requestId, task, resources, locationPreferences); + return internalAllocateSlot( + slotRequestId, + scheduledUnit, + resourceProfile, + locationPreferences, + allowQueuedScheduling); } - @Override - public void returnAllocatedSlot(SlotRequestID slotRequestId) { - final AllocatedSlot allocatedSlot = allocatedSlots.remove(slotRequestId); + private CompletableFuture internalAllocateSlot( + SlotRequestId slotRequestId, + ScheduledUnit task, + ResourceProfile resourceProfile, + Collection locationPreferences, + boolean allowQueuedScheduling) { - if (allocatedSlot != null) { - if (allocatedSlot.releaseLogicalSlot()) { - tryFulfillSlotRequestOrMakeAvailable(allocatedSlot); - } else { - throw new RuntimeException("Could not release allocated slot " + allocatedSlot + '.'); + final SlotSharingGroupId slotSharingGroupId = task.getSlotSharingGroupId(); + + if (slotSharingGroupId != null) { + // allocate slot with slot sharing + final SlotSharingManager multiTaskSlotManager = slotSharingManagers.computeIfAbsent( + slotSharingGroupId, + id -> new SlotSharingManager( + id, + this, + providerAndOwner)); + + final SlotSharingManager.MultiTaskSlotLocality multiTaskSlotLocality; + + try { + if (task.getCoLocationConstraint() != null) { + multiTaskSlotLocality = allocateCoLocatedMultiTaskSlot( + task.getCoLocationConstraint(), + multiTaskSlotManager, + resourceProfile, + locationPreferences, + allowQueuedScheduling); + } else { + multiTaskSlotLocality = allocateMultiTaskSlot( + task.getJobVertexId(), multiTaskSlotManager, + resourceProfile, + locationPreferences, + allowQueuedScheduling); + } + } catch (NoResourceAvailableException noResourceException) { + return FutureUtils.completedExceptionally(noResourceException); } + + // sanity check + Preconditions.checkState(!multiTaskSlotLocality.getMultiTaskSlot().contains(task.getJobVertexId())); + + final SlotSharingManager.SingleTaskSlot leaf = multiTaskSlotLocality.getMultiTaskSlot().allocateSingleTaskSlot( + slotRequestId, + task.getJobVertexId(), + multiTaskSlotLocality.getLocality()); + + return leaf.getLogicalSlotFuture(); } else { - log.debug("There is no allocated slot with request id {}. Ignoring this request.", slotRequestId); + // request an allocated slot to assign a single logical slot to + CompletableFuture slotAndLocalityFuture = requestAllocatedSlot( + slotRequestId, + resourceProfile, + locationPreferences, + allowQueuedScheduling); + + return slotAndLocalityFuture.thenApply( + (SlotAndLocality slotAndLocality) -> { + final AllocatedSlot allocatedSlot = slotAndLocality.getSlot(); + + final SingleLogicalSlot singleTaskSlot = new SingleLogicalSlot( + slotRequestId, + allocatedSlot, + null, + slotAndLocality.getLocality(), + providerAndOwner); + + if (allocatedSlot.tryAssignPayload(singleTaskSlot)) { + return singleTaskSlot; + } else { + final FlinkException flinkException = new FlinkException("Could not assign payload to allocated slot " + allocatedSlot.getAllocationId() + '.'); + releaseSlot(slotRequestId, null, flinkException); + throw new CompletionException(flinkException); + } + }); } } - @Override - public CompletableFuture cancelSlotRequest(SlotRequestID slotRequestId) { - final PendingRequest pendingRequest = removePendingRequest(slotRequestId); + /** + * Allocates a co-located {@link SlotSharingManager.MultiTaskSlot} for the given {@link CoLocationConstraint}. + * + *

If allowQueuedScheduling is true, then the returned {@link SlotSharingManager.MultiTaskSlot} can be + * uncompleted. + * + * @param coLocationConstraint for which to allocate a {@link SlotSharingManager.MultiTaskSlot} + * @param multiTaskSlotManager responsible for the slot sharing group for which to allocate the slot + * @param resourceProfile specifying the requirements for the requested slot + * @param locationPreferences containing preferred TaskExecutors on which to allocate the slot + * @param allowQueuedScheduling true if queued scheduling (the returned task slot must not be completed yet) is allowed, otherwise false + * @return A {@link SlotSharingManager.MultiTaskSlotLocality} which contains the allocated{@link SlotSharingManager.MultiTaskSlot} + * and its locality wrt the given location preferences + * @throws NoResourceAvailableException if no task slot could be allocated + */ + private SlotSharingManager.MultiTaskSlotLocality allocateCoLocatedMultiTaskSlot( + CoLocationConstraint coLocationConstraint, + SlotSharingManager multiTaskSlotManager, + ResourceProfile resourceProfile, + Collection locationPreferences, + boolean allowQueuedScheduling) throws NoResourceAvailableException { + final SlotRequestId coLocationSlotRequestId = coLocationConstraint.getSlotRequestId(); + + if (coLocationSlotRequestId != null) { + // we have a slot assigned --> try to retrieve it + final SlotSharingManager.TaskSlot taskSlot = multiTaskSlotManager.getTaskSlot(coLocationSlotRequestId); + + if (taskSlot != null) { + Preconditions.checkState(taskSlot instanceof SlotSharingManager.MultiTaskSlot); + return SlotSharingManager.MultiTaskSlotLocality.of(((SlotSharingManager.MultiTaskSlot) taskSlot), Locality.LOCAL); + } else { + // the slot may have been cancelled in the mean time + coLocationConstraint.setSlotRequestId(null); + } + } - if (pendingRequest != null) { - failPendingRequest(pendingRequest, new CancellationException("Allocation with request id" + slotRequestId + " cancelled.")); + final Collection actualLocationPreferences; + + if (coLocationConstraint.isAssigned()) { + actualLocationPreferences = Collections.singleton(coLocationConstraint.getLocation()); } else { - final AllocatedSlot allocatedSlot = allocatedSlots.get(slotRequestId); + actualLocationPreferences = locationPreferences; + } - if (allocatedSlot != null) { - LOG.info("Returning allocated slot {} because the corresponding allocation request {} was cancelled.", allocatedSlot, slotRequestId); - // TODO: Avoid having to send another message to do the slot releasing (e.g. introduce Slot#cancelExecution) and directly return slot - allocatedSlot.triggerLogicalSlotRelease(); - } else { - LOG.debug("There was no slot allocation with {} to be cancelled.", slotRequestId); - } + // get a new multi task slot + final SlotSharingManager.MultiTaskSlotLocality multiTaskSlotLocality = allocateMultiTaskSlot( + coLocationConstraint.getGroupId(), multiTaskSlotManager, + resourceProfile, + actualLocationPreferences, + allowQueuedScheduling); + + // check whether we fulfill the co-location constraint + if (coLocationConstraint.isAssigned() && multiTaskSlotLocality.getLocality() != Locality.LOCAL) { + multiTaskSlotLocality.getMultiTaskSlot().release( + new FlinkException("Multi task slot is not local and, thus, does not fulfill the co-location constraint.")); + + throw new NoResourceAvailableException("Could not allocate a local multi task slot for the " + + "co location constraint " + coLocationConstraint + '.'); } - return CompletableFuture.completedFuture(Acknowledge.get()); + final SlotRequestId slotRequestId = new SlotRequestId(); + final SlotSharingManager.MultiTaskSlot coLocationSlot = multiTaskSlotLocality.getMultiTaskSlot().allocateMultiTaskSlot( + slotRequestId, + coLocationConstraint.getGroupId()); + + // mark the requested slot as co-located slot for other co-located tasks + coLocationConstraint.setSlotRequestId(slotRequestId); + + // lock the co-location constraint once we have obtained the allocated slot + coLocationSlot.getSlotContextFuture().whenComplete( + (SlotContext slotContext, Throwable throwable) -> { + if (throwable == null) { + // check whether we are still assigned to the co-location constraint + if (Objects.equals(coLocationConstraint.getSlotRequestId(), slotRequestId)) { + coLocationConstraint.lockLocation(slotContext.getTaskManagerLocation()); + } else { + log.debug("Failed to lock colocation constraint {} because assigned slot " + + "request {} differs from fulfilled slot request {}.", + coLocationConstraint.getGroupId(), + coLocationConstraint.getSlotRequestId(), + slotRequestId); + } + } else { + log.debug("Failed to lock colocation constraint {} because the slot " + + "allocation for slot request {} failed.", + coLocationConstraint.getGroupId(), + coLocationConstraint.getSlotRequestId(), + throwable); + } + }); + + return SlotSharingManager.MultiTaskSlotLocality.of(coLocationSlot, multiTaskSlotLocality.getLocality()); } - CompletableFuture internalAllocateSlot( - SlotRequestID requestId, - ScheduledUnit task, - ResourceProfile resources, - Iterable locationPreferences) { + /** + * Allocates a {@link SlotSharingManager.MultiTaskSlot} for the given groupId which is in the + * slot sharing group for which the given {@link SlotSharingManager} is responsible. + * + *

If allowQueuedScheduling is true, then the method can return an uncompleted {@link SlotSharingManager.MultiTaskSlot}. + * + * @param groupId for which to allocate a new {@link SlotSharingManager.MultiTaskSlot} + * @param slotSharingManager responsible for the slot sharing group for which to allocate the slot + * @param resourceProfile specifying the requirements for the requested slot + * @param locationPreferences containing preferred TaskExecutors on which to allocate the slot + * @param allowQueuedScheduling true if queued scheduling (the returned task slot must not be completed yet) is allowed, otherwise false + * @return A {@link SlotSharingManager.MultiTaskSlotLocality} which contains the allocated {@link SlotSharingManager.MultiTaskSlot} + * and its locality wrt the given location preferences + * @throws NoResourceAvailableException if no task slot could be allocated + */ + private SlotSharingManager.MultiTaskSlotLocality allocateMultiTaskSlot( + AbstractID groupId, + SlotSharingManager slotSharingManager, + ResourceProfile resourceProfile, + Collection locationPreferences, + boolean allowQueuedScheduling) throws NoResourceAvailableException { + + // check first whether we have a resolved root slot which we can use + SlotSharingManager.MultiTaskSlotLocality multiTaskSlotLocality = slotSharingManager.getResolvedRootSlot( + groupId, + locationPreferences); + + if (multiTaskSlotLocality != null && multiTaskSlotLocality.getLocality() == Locality.LOCAL) { + return multiTaskSlotLocality; + } - // (1) do we have a slot available already? - SlotAndLocality slotFromPool = availableSlots.poll(resources, locationPreferences); - if (slotFromPool != null) { - final AllocatedSlot allocatedSlot = slotFromPool.slot(); + final SlotRequestId allocatedSlotRequestId = new SlotRequestId(); + final SlotRequestId multiTaskSlotRequestId = new SlotRequestId(); - final SimpleSlot simpleSlot; - try { - simpleSlot = allocatedSlot.allocateSimpleSlot(requestId, slotFromPool.locality()); - } catch (SlotException e) { - availableSlots.add(allocatedSlot, clock.relativeTimeMillis()); + // check whether we have an allocated slot available which we can use to create a new multi task slot in + final SlotAndLocality polledSlotAndLocality = pollAndAllocateSlot(allocatedSlotRequestId, resourceProfile, locationPreferences); + + if (polledSlotAndLocality != null && (polledSlotAndLocality.getLocality() == Locality.LOCAL || multiTaskSlotLocality == null)) { - return FutureUtils.completedExceptionally(e); + final AllocatedSlot allocatedSlot = polledSlotAndLocality.getSlot(); + final SlotSharingManager.MultiTaskSlot multiTaskSlot = slotSharingManager.createRootSlot( + multiTaskSlotRequestId, + CompletableFuture.completedFuture(polledSlotAndLocality.getSlot()), + allocatedSlotRequestId); + + if (allocatedSlot.tryAssignPayload(multiTaskSlot)) { + return SlotSharingManager.MultiTaskSlotLocality.of(multiTaskSlot, polledSlotAndLocality.getLocality()); + } else { + multiTaskSlot.release(new FlinkException("Could not assign payload to allocated slot " + + allocatedSlot.getAllocationId() + '.')); } + } - allocatedSlots.add(requestId, allocatedSlot); - return CompletableFuture.completedFuture(simpleSlot); + if (multiTaskSlotLocality != null) { + // prefer slot sharing group slots over unused slots + if (polledSlotAndLocality != null) { + releaseSlot( + allocatedSlotRequestId, + null, + new FlinkException("Locality constraint is not better fulfilled by allocated slot.")); + } + return multiTaskSlotLocality; } - // we have to request a new allocated slot - CompletableFuture allocatedSlotFuture = requestSlot( - requestId, - resources); + if (allowQueuedScheduling) { + // there is no slot immediately available --> check first for uncompleted slots at the slot sharing group + SlotSharingManager.MultiTaskSlot multiTaskSlotFuture = slotSharingManager.getUnresolvedRootSlot(groupId); - return allocatedSlotFuture.thenApply( - (AllocatedSlot allocatedSlot) -> { - try { - return allocatedSlot.allocateSimpleSlot(requestId, Locality.UNKNOWN); - } catch (SlotException e) { - throw new CompletionException("Could not allocate a logical simple slot from allocate slot " + - allocatedSlot + '.', e); - } - }); + if (multiTaskSlotFuture == null) { + // it seems as if we have to request a new slot from the resource manager, this is always the last resort!!! + final CompletableFuture futureSlot = requestNewAllocatedSlot(allocatedSlotRequestId, resourceProfile); + + multiTaskSlotFuture = slotSharingManager.createRootSlot( + multiTaskSlotRequestId, + futureSlot, + allocatedSlotRequestId); + + futureSlot.whenComplete( + (AllocatedSlot allocatedSlot, Throwable throwable) -> { + final SlotSharingManager.TaskSlot taskSlot = slotSharingManager.getTaskSlot(multiTaskSlotRequestId); + + if (taskSlot != null) { + // still valid + if (!(taskSlot instanceof SlotSharingManager.MultiTaskSlot) || throwable != null) { + taskSlot.release(throwable); + } else { + if (!allocatedSlot.tryAssignPayload(((SlotSharingManager.MultiTaskSlot) taskSlot))) { + taskSlot.release(new FlinkException("Could not assign payload to allocated slot " + + allocatedSlot.getAllocationId() + '.')); + } + } + } else { + releaseSlot( + allocatedSlotRequestId, + null, + new FlinkException("Could not find task slot with " + multiTaskSlotRequestId + '.')); + } + }); + } + + return SlotSharingManager.MultiTaskSlotLocality.of(multiTaskSlotFuture, Locality.UNKNOWN); + + } else { + throw new NoResourceAvailableException("Could not allocate a shared slot for " + groupId + '.'); + } } /** - * Checks whether there exists a pending request with the given allocation id and removes it - * from the internal data structures. + * Allocates an allocated slot first by polling from the available slots and then requesting a new + * slot from the ResourceManager if no fitting slot could be found. * - * @param requestId identifying the pending request - * @return pending request if there is one, otherwise null + * @param slotRequestId identifying the slot allocation request + * @param resourceProfile which the allocated slot should fulfill + * @param locationPreferences for the allocated slot + * @param allowQueuedScheduling true if the slot allocation can be completed in the future + * @return Future containing the allocated simple slot */ - @Nullable - private PendingRequest removePendingRequest(SlotRequestID requestId) { - PendingRequest result = waitingForResourceManager.remove(requestId); + private CompletableFuture requestAllocatedSlot( + SlotRequestId slotRequestId, + ResourceProfile resourceProfile, + Collection locationPreferences, + boolean allowQueuedScheduling) { - if (result != null) { - // sanity check - assert !pendingRequests.containsKeyA(requestId) : "A pending requests should only be part of either " + - "the pendingRequests or waitingForResourceManager but not both."; + final CompletableFuture allocatedSlotLocalityFuture; - return result; + // (1) do we have a slot available already? + SlotAndLocality slotFromPool = pollAndAllocateSlot(slotRequestId, resourceProfile, locationPreferences); + + if (slotFromPool != null) { + allocatedSlotLocalityFuture = CompletableFuture.completedFuture(slotFromPool); + } else if (allowQueuedScheduling) { + // we have to request a new allocated slot + CompletableFuture allocatedSlotFuture = requestNewAllocatedSlot( + slotRequestId, + resourceProfile); + + allocatedSlotLocalityFuture = allocatedSlotFuture.thenApply((AllocatedSlot allocatedSlot) -> new SlotAndLocality(allocatedSlot, Locality.UNKNOWN)); } else { - return pendingRequests.removeKeyA(requestId); + allocatedSlotLocalityFuture = FutureUtils.completedExceptionally(new NoResourceAvailableException("Could not allocate a simple slot for " + + slotRequestId + '.')); } + + return allocatedSlotLocalityFuture; } - private CompletableFuture requestSlot( - SlotRequestID slotRequestId, + /** + * Requests a new slot with the given {@link ResourceProfile} from the ResourceManager. If there is + * currently not ResourceManager connected, then the request is stashed and send once a new + * ResourceManager is connected. + * + * @param slotRequestId identifying the requested slot + * @param resourceProfile which the requested slot should fulfill + * @return An {@link AllocatedSlot} future which is completed once the slot is offered to the {@link SlotPool} + */ + private CompletableFuture requestNewAllocatedSlot( + SlotRequestId slotRequestId, ResourceProfile resourceProfile) { final PendingRequest pendingRequest = new PendingRequest( @@ -432,7 +678,7 @@ private void requestSlotFromResourceManager( getMainThreadExecutor()); } - private void slotRequestToResourceManagerSuccess(final SlotRequestID requestId) { + private void slotRequestToResourceManagerSuccess(final SlotRequestId requestId) { // a request is pending from the ResourceManager to a (future) TaskManager // we only add the watcher here in case that request times out scheduleRunAsync(new Runnable() { @@ -443,7 +689,7 @@ public void run() { }, resourceManagerAllocationTimeout); } - private void slotRequestToResourceManagerFailed(SlotRequestID slotRequestID, Throwable failure) { + private void slotRequestToResourceManagerFailed(SlotRequestId slotRequestID, Throwable failure) { PendingRequest request = pendingRequests.removeKeyA(slotRequestID); if (request != null) { request.getAllocatedSlotFuture().completeExceptionally(new NoResourceAvailableException( @@ -455,23 +701,13 @@ private void slotRequestToResourceManagerFailed(SlotRequestID slotRequestID, Thr } } - private void checkTimeoutSlotAllocation(SlotRequestID slotRequestID) { + private void checkTimeoutSlotAllocation(SlotRequestId slotRequestID) { PendingRequest request = pendingRequests.removeKeyA(slotRequestID); if (request != null) { failPendingRequest(request, new TimeoutException("Slot allocation request " + slotRequestID + " timed out")); } } - private void failPendingRequest(PendingRequest pendingRequest, Exception e) { - Preconditions.checkNotNull(pendingRequest); - Preconditions.checkNotNull(e); - - if (!pendingRequest.getAllocatedSlotFuture().isDone()) { - LOG.info("Failing pending request {}.", pendingRequest.getSlotRequestId()); - pendingRequest.getAllocatedSlotFuture().completeExceptionally(e); - } - } - private void stashRequestWaitingForResourceManager(final PendingRequest pendingRequest) { LOG.info("Cannot serve slot request, no ResourceManager connected. " + @@ -487,7 +723,7 @@ public void run() { }, resourceManagerRequestsTimeout); } - private void checkTimeoutRequestWaitingForResourceManager(SlotRequestID slotRequestId) { + private void checkTimeoutRequestWaitingForResourceManager(SlotRequestId slotRequestId) { PendingRequest request = waitingForResourceManager.remove(slotRequestId); if (request != null) { failPendingRequest( @@ -500,6 +736,91 @@ private void checkTimeoutRequestWaitingForResourceManager(SlotRequestID slotRequ // Slot releasing & offering // ------------------------------------------------------------------------ + @Override + public CompletableFuture releaseSlot(SlotRequestId slotRequestId, @Nullable SlotSharingGroupId slotSharingGroupId, Throwable cause) { + + if (slotSharingGroupId != null) { + final SlotSharingManager multiTaskSlotManager = slotSharingManagers.get(slotSharingGroupId); + + if (multiTaskSlotManager != null) { + final SlotSharingManager.TaskSlot taskSlot = multiTaskSlotManager.getTaskSlot(slotRequestId); + + if (taskSlot != null) { + taskSlot.release(cause); + } else { + log.debug("Could not find slot {} in slot sharing group {}. Ignoring release slot request.", slotRequestId, slotSharingGroupId, cause); + } + } else { + log.debug("Could not find slot sharing group {}. Ignoring release slot request.", slotSharingGroupId, cause); + } + } else { + final PendingRequest pendingRequest = removePendingRequest(slotRequestId); + + if (pendingRequest != null) { + failPendingRequest(pendingRequest, new FlinkException("Pending slot request with " + slotRequestId + " has been released.")); + } else { + final AllocatedSlot allocatedSlot = allocatedSlots.remove(slotRequestId); + + if (allocatedSlot != null) { + // sanity check + if (allocatedSlot.releasePayload(cause)) { + tryFulfillSlotRequestOrMakeAvailable(allocatedSlot); + } + } else { + log.debug("There is no allocated slot with allocation id {}. Ignoring the release slot request.", slotRequestId, cause); + } + } + } + + return CompletableFuture.completedFuture(Acknowledge.get()); + } + + /** + * Checks whether there exists a pending request with the given allocation id and removes it + * from the internal data structures. + * + * @param requestId identifying the pending request + * @return pending request if there is one, otherwise null + */ + @Nullable + private PendingRequest removePendingRequest(SlotRequestId requestId) { + PendingRequest result = waitingForResourceManager.remove(requestId); + + if (result != null) { + // sanity check + assert !pendingRequests.containsKeyA(requestId) : "A pending requests should only be part of either " + + "the pendingRequests or waitingForResourceManager but not both."; + + return result; + } else { + return pendingRequests.removeKeyA(requestId); + } + } + + private void failPendingRequest(PendingRequest pendingRequest, Exception e) { + Preconditions.checkNotNull(pendingRequest); + Preconditions.checkNotNull(e); + + if (!pendingRequest.getAllocatedSlotFuture().isDone()) { + LOG.info("Failing pending request {}.", pendingRequest.getSlotRequestId()); + pendingRequest.getAllocatedSlotFuture().completeExceptionally(e); + } + } + + @Nullable + private SlotAndLocality pollAndAllocateSlot( + SlotRequestId slotRequestId, + ResourceProfile resourceProfile, + Collection locationPreferences) { + SlotAndLocality slotFromPool = availableSlots.poll(resourceProfile, locationPreferences); + + if (slotFromPool != null) { + allocatedSlots.add(slotRequestId, slotFromPool.getSlot()); + } + + return slotFromPool; + } + /** * Tries to fulfill with the given allocated slot a pending slot request or add the * allocated slot to the set of available slots if no matching request is available. @@ -587,9 +908,9 @@ public CompletableFuture> offerSlots( return resultingSlotOffers; } - + /** - * Slot offering by TaskManager with AllocationID. The AllocationID is originally generated by this pool and + * Slot offering by TaskExecutor with AllocationID. The AllocationID is originally generated by this pool and * transfer through the ResourceManager to TaskManager. We use it to distinguish the different allocation * we issued. Slot offering may be rejected if we find something mismatching or there is actually no pending * request waiting for this slot (maybe fulfilled by some other returned slot). @@ -630,15 +951,19 @@ public CompletableFuture offerSlot( taskManagerLocation, slotOffer.getSlotIndex(), slotOffer.getResourceProfile(), - taskManagerGateway, - providerAndOwner); + taskManagerGateway); // check whether we have request waiting for this slot PendingRequest pendingRequest = pendingRequests.removeKeyB(allocationID); if (pendingRequest != null) { // we were waiting for this! allocatedSlots.add(pendingRequest.getSlotRequestId(), allocatedSlot); - pendingRequest.getAllocatedSlotFuture().complete(allocatedSlot); + + if (!pendingRequest.getAllocatedSlotFuture().complete(allocatedSlot)) { + // we could not complete the pending slot future --> try to fulfill another pending request + allocatedSlots.remove(pendingRequest.getSlotRequestId()); + tryFulfillSlotRequestOrMakeAvailable(allocatedSlot); + } } else { // we were actually not waiting for this: @@ -652,7 +977,7 @@ public CompletableFuture offerSlot( return CompletableFuture.completedFuture(true); } - + // TODO - periodic (every minute or so) catch slots that were lost (check all slots, if they have any task active) // TODO - release slots that were not used to the resource manager @@ -685,7 +1010,7 @@ else if (availableSlots.tryRemove(allocationID)) { if (allocatedSlot != null) { // release the slot. // since it is not in 'allocatedSlots' any more, it will be dropped o return' - allocatedSlot.triggerLogicalSlotRelease(); + allocatedSlot.releasePayload(cause); } else { LOG.debug("Outdated request to fail slot [{}] with ", allocationID, cause); @@ -724,10 +1049,9 @@ public CompletableFuture releaseTaskManager(final ResourceID resour availableSlots.removeAllForTaskManager(resourceID); final Set allocatedSlotsForResource = allocatedSlots.removeSlotsForTaskManager(resourceID); + for (AllocatedSlot allocatedSlot : allocatedSlotsForResource) { - allocatedSlot.triggerLogicalSlotRelease(); - // TODO: This is a work-around to mark the logical slot as released. We should split up the internalReturnSlot method to not poll pending requests - allocatedSlot.releaseLogicalSlot(); + allocatedSlot.releasePayload(new FlinkException("TaskManager " + resourceID + " was released.")); } } @@ -739,22 +1063,22 @@ public CompletableFuture releaseTaskManager(final ResourceID resour // ------------------------------------------------------------------------ @VisibleForTesting - AllocatedSlots getAllocatedSlots() { + protected AllocatedSlots getAllocatedSlots() { return allocatedSlots; } @VisibleForTesting - AvailableSlots getAvailableSlots() { + protected AvailableSlots getAvailableSlots() { return availableSlots; } @VisibleForTesting - DualKeyMap getPendingRequests() { + DualKeyMap getPendingRequests() { return pendingRequests; } @VisibleForTesting - Map getWaitingForResourceManager() { + Map getWaitingForResourceManager() { return waitingForResourceManager; } @@ -767,11 +1091,11 @@ Map getWaitingForResourceManager() { */ static class AllocatedSlots { - /** All allocated slots organized by TaskManager's id */ + /** All allocated slots organized by TaskManager's id. */ private final Map> allocatedSlotsByTaskManager; - /** All allocated slots organized by AllocationID */ - private final DualKeyMap allocatedSlotsById; + /** All allocated slots organized by AllocationID. */ + private final DualKeyMap allocatedSlotsById; AllocatedSlots() { this.allocatedSlotsByTaskManager = new HashMap<>(16); @@ -783,7 +1107,7 @@ static class AllocatedSlots { * * @param allocatedSlot The allocated slot */ - void add(SlotRequestID slotRequestId, AllocatedSlot allocatedSlot) { + void add(SlotRequestId slotRequestId, AllocatedSlot allocatedSlot) { allocatedSlotsById.put(allocatedSlot.getAllocationId(), slotRequestId, allocatedSlot); final ResourceID resourceID = allocatedSlot.getTaskManagerLocation().getResourceID(); @@ -796,7 +1120,7 @@ void add(SlotRequestID slotRequestId, AllocatedSlot allocatedSlot) { } /** - * Get allocated slot with allocation id + * Get allocated slot with allocation id. * * @param allocationID The allocation id * @return The allocated slot, null if we can't find a match @@ -805,12 +1129,12 @@ AllocatedSlot get(final AllocationID allocationID) { return allocatedSlotsById.getKeyA(allocationID); } - AllocatedSlot get(final SlotRequestID slotRequestId) { + AllocatedSlot get(final SlotRequestId slotRequestId) { return allocatedSlotsById.getKeyB(slotRequestId); } /** - * Check whether we have allocated this slot + * Check whether we have allocated this slot. * * @param slotAllocationId The allocation id of the slot to check * @return True if we contains this slot @@ -843,7 +1167,7 @@ AllocatedSlot remove(final AllocationID allocationID) { * @return The removed allocated slot or null. */ @Nullable - AllocatedSlot remove(final SlotRequestID slotRequestId) { + AllocatedSlot remove(final SlotRequestId slotRequestId) { final AllocatedSlot allocatedSlot = allocatedSlotsById.removeKeyB(slotRequestId); if (allocatedSlot != null) { @@ -914,15 +1238,15 @@ Set getSlotsForTaskManager(ResourceID resourceId) { /** * Organize all available slots from different points of view. */ - static class AvailableSlots { + protected static class AvailableSlots { - /** All available slots organized by TaskManager */ + /** All available slots organized by TaskManager. */ private final HashMap> availableSlotsByTaskManager; - /** All available slots organized by host */ + /** All available slots organized by host. */ private final HashMap> availableSlotsByHost; - /** The available slots, with the time when they were inserted */ + /** The available slots, with the time when they were inserted. */ private final HashMap availableSlots; AvailableSlots() { @@ -978,10 +1302,10 @@ boolean contains(AllocationID slotId) { * * @param resourceProfile The required resource profile. * @param locationPreferences The location preferences, in order to be checked. - * + * * @return Slot which matches the resource profile, null if we can't find a match */ - SlotAndLocality poll(ResourceProfile resourceProfile, Iterable locationPreferences) { + SlotAndLocality poll(ResourceProfile resourceProfile, Collection locationPreferences) { // fast path if no slots are available if (availableSlots.isEmpty()) { return null; @@ -989,7 +1313,7 @@ SlotAndLocality poll(ResourceProfile resourceProfile, Iterable returnAllocatedSlot(LogicalSlot slot) { - gateway.returnAllocatedSlot(slot.getSlotRequestId()); - return CompletableFuture.completedFuture(true); + return gateway + .releaseSlot( + slot.getSlotRequestId(), + slot.getSlotSharingGroupId(), + new FlinkException("Slot is being returned to the SlotPool.")) + .thenApply( + (Acknowledge acknowledge) -> true); } @Override @@ -1140,14 +1469,25 @@ public CompletableFuture allocateSlot( boolean allowQueued, Collection preferredLocations) { - final SlotRequestID requestId = new SlotRequestID(); - CompletableFuture slotFuture = gateway.allocateSlot(requestId, task, ResourceProfile.UNKNOWN, preferredLocations, timeout); + final SlotRequestId requestId = new SlotRequestId(); + CompletableFuture slotFuture = gateway.allocateSlot( + requestId, + task, + ResourceProfile.UNKNOWN, + preferredLocations, + allowQueued, + timeout); + slotFuture.whenComplete( (LogicalSlot slot, Throwable failure) -> { if (failure != null) { - gateway.cancelSlotRequest(requestId); + gateway.releaseSlot( + requestId, + task.getSlotSharingGroupId(), + failure); } }); + return slotFuture; } } @@ -1155,18 +1495,18 @@ public CompletableFuture allocateSlot( // ------------------------------------------------------------------------ /** - * A pending request for a slot + * A pending request for a slot. */ private static class PendingRequest { - private final SlotRequestID slotRequestId; + private final SlotRequestId slotRequestId; private final ResourceProfile resourceProfile; private final CompletableFuture allocatedSlotFuture; PendingRequest( - SlotRequestID slotRequestId, + SlotRequestId slotRequestId, ResourceProfile resourceProfile) { this.slotRequestId = Preconditions.checkNotNull(slotRequestId); this.resourceProfile = Preconditions.checkNotNull(resourceProfile); @@ -1174,7 +1514,7 @@ private static class PendingRequest { allocatedSlotFuture = new CompletableFuture<>(); } - public SlotRequestID getSlotRequestId() { + public SlotRequestId getSlotRequestId() { return slotRequestId; } @@ -1190,7 +1530,7 @@ public ResourceProfile getResourceProfile() { // ------------------------------------------------------------------------ /** - * A slot, together with the timestamp when it was added + * A slot, together with the timestamp when it was added. */ private static class SlotAndTimestamp { diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotPoolGateway.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolGateway.java similarity index 55% rename from flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotPoolGateway.java rename to flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolGateway.java index 103bc612920e8..d3b51f7b119bf 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotPoolGateway.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolGateway.java @@ -16,14 +16,17 @@ * limitations under the License. */ -package org.apache.flink.runtime.instance; +package org.apache.flink.runtime.jobmaster.slotpool; import org.apache.flink.api.common.time.Time; import org.apache.flink.runtime.clusterframework.types.AllocationID; import org.apache.flink.runtime.clusterframework.types.ResourceID; import org.apache.flink.runtime.clusterframework.types.ResourceProfile; +import org.apache.flink.runtime.instance.SlotSharingGroupId; import org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit; import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.jobmaster.SlotRequestId; import org.apache.flink.runtime.messages.Acknowledge; import org.apache.flink.runtime.resourcemanager.ResourceManagerGateway; import org.apache.flink.runtime.rpc.RpcGateway; @@ -37,7 +40,7 @@ /** * The gateway for calls on the {@link SlotPool}. */ -public interface SlotPoolGateway extends RpcGateway { +public interface SlotPoolGateway extends AllocatedSlotActions, RpcGateway { // ------------------------------------------------------------------------ // shutdown @@ -70,41 +73,87 @@ public interface SlotPoolGateway extends RpcGateway { // registering / un-registering TaskManagers and slots // ------------------------------------------------------------------------ + /** + * Registers a TaskExecutor with the given {@link ResourceID} at {@link SlotPool}. + * + * @param resourceID identifying the TaskExecutor to register + * @return Future acknowledge which is completed after the TaskExecutor has been registered + */ CompletableFuture registerTaskManager(ResourceID resourceID); + /** + * Releases a TaskExecutor with the given {@link ResourceID} from the {@link SlotPool}. + * + * @param resourceID identifying the TaskExecutor which shall be released from the SlotPool + * @return Future acknowledge which is completed after the TaskExecutor has been released + */ CompletableFuture releaseTaskManager(ResourceID resourceID); + /** + * Offers a slot to the {@link SlotPool}. The slot offer can be accepted or + * rejected. + * + * @param taskManagerLocation from which the slot offer originates + * @param taskManagerGateway to talk to the slot offerer + * @param slotOffer slot which is offered to the {@link SlotPool} + * @return True (future) if the slot has been accepted, otherwise false (future) + */ CompletableFuture offerSlot( TaskManagerLocation taskManagerLocation, TaskManagerGateway taskManagerGateway, SlotOffer slotOffer); + /** + * Offers multiple slots to the {@link SlotPool}. The slot offerings can be + * individually accepted or rejected by returning the collection of accepted + * slot offers. + * + * @param taskManagerLocation from which the slot offeres originate + * @param taskManagerGateway to talk to the slot offerer + * @param offers slot offers which are offered to the {@link SlotPool} + * @return A collection of accepted slot offers (future). The remaining slot offers are + * implicitly rejected. + */ CompletableFuture> offerSlots( TaskManagerLocation taskManagerLocation, TaskManagerGateway taskManagerGateway, Collection offers); - + + /** + * Fails the slot with the given allocation id. + * + * @param allocationID identifying the slot which is being failed + * @param cause of the failure + */ void failAllocation(AllocationID allocationID, Exception cause); // ------------------------------------------------------------------------ // allocating and disposing slots // ------------------------------------------------------------------------ - CompletableFuture allocateSlot( - SlotRequestID requestId, - ScheduledUnit task, - ResourceProfile resources, - Iterable locationPreferences, - @RpcTimeout Time timeout); - - void returnAllocatedSlot(SlotRequestID slotRequestId); - /** - * Cancel a slot allocation request. + * Requests to allocate a slot for the given {@link ScheduledUnit}. The request + * is uniquely identified by the provided {@link SlotRequestId} which can also + * be used to release the slot via {@link #releaseSlot(SlotRequestId, SlotSharingGroupId, Throwable)}. + * The allocated slot will fulfill the requested {@link ResourceProfile} and it + * is tried to place it on one of the location preferences. + * + *

If the returned future must not be completed right away (a.k.a. the slot request + * can be queued), allowQueuedScheduling must be set to true. * - * @param slotRequestId identifying the slot allocation request - * @return Future acknowledge if the slot allocation has been cancelled + * @param slotRequestId identifying the requested slot + * @param scheduledUnit for which to allocate slot + * @param resourceProfile which the allocated slot must fulfill + * @param locationPreferences which define where the allocated slot should be placed, this can also be empty + * @param allowQueuedScheduling true if the slot request can be queued (e.g. the returned future must not be completed) + * @param timeout for the operation + * @return */ - CompletableFuture cancelSlotRequest(SlotRequestID slotRequestId); - + CompletableFuture allocateSlot( + SlotRequestId slotRequestId, + ScheduledUnit scheduledUnit, + ResourceProfile resourceProfile, + Collection locationPreferences, + boolean allowQueuedScheduling, + @RpcTimeout Time timeout); } diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotProvider.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SlotProvider.java similarity index 94% rename from flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotProvider.java rename to flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SlotProvider.java index 98427c2f855d8..8e8d019949a26 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/instance/SlotProvider.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SlotProvider.java @@ -16,8 +16,9 @@ * limitations under the License. */ -package org.apache.flink.runtime.instance; +package org.apache.flink.runtime.jobmaster.slotpool; +import org.apache.flink.runtime.jobmaster.LogicalSlot; import org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SlotSharingManager.java b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SlotSharingManager.java new file mode 100644 index 0000000000000..91ffa8d815714 --- /dev/null +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SlotSharingManager.java @@ -0,0 +1,740 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.jobmaster.slotpool; + +import org.apache.flink.annotation.VisibleForTesting; +import org.apache.flink.runtime.instance.SlotSharingGroupId; +import org.apache.flink.runtime.jobmanager.scheduler.Locality; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.jobmaster.SlotContext; +import org.apache.flink.runtime.jobmaster.SlotOwner; +import org.apache.flink.runtime.jobmaster.SlotRequestId; +import org.apache.flink.runtime.taskmanager.TaskManagerLocation; +import org.apache.flink.util.AbstractID; +import org.apache.flink.util.FlinkException; +import org.apache.flink.util.Preconditions; + +import javax.annotation.Nullable; +import javax.annotation.concurrent.GuardedBy; + +import java.util.AbstractCollection; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.CompletableFuture; + +/** + * Manager which is responsible for slot sharing. Slot sharing allows to run different + * tasks in the same slot and to realize co-location constraints. + * + *

The SlotSharingManager allows to create a hierarchy of {@link TaskSlot} such that + * every {@link TaskSlot} is uniquely identified by a {@link SlotRequestId} identifying + * the request for the TaskSlot and a {@link AbstractID} identifying the task or the + * co-location constraint running in this slot. + * + *

The {@link TaskSlot} hierarchy is implemented by {@link MultiTaskSlot} and + * {@link SingleTaskSlot}. The former class represents inner nodes which can contain + * a number of other {@link TaskSlot} and the latter class represents the leaf nodes. + * The hierarchy starts with a root {@link MultiTaskSlot} which is a future + * {@link SlotContext} assigned. The {@link SlotContext} represents the allocated slot + * on the TaskExecutor in which all slots of this hierarchy run. A {@link MultiTaskSlot} + * can be assigned multiple {@link SingleTaskSlot} or {@link MultiTaskSlot} if and only if + * the task slot does not yet contain another child with the same {@link AbstractID} identifying + * the actual task or the co-location constraint. + * + *

Normal slot sharing is represented by a root {@link MultiTaskSlot} which contains a set + * of {@link SingleTaskSlot} on the second layer. Each {@link SingleTaskSlot} represents a different + * task. + * + *

Co-location constraints are modeled by adding a {@link MultiTaskSlot} to the root node. The co-location + * constraint is uniquely identified by a {@link AbstractID} such that we cannot add a second co-located + * {@link MultiTaskSlot} to the same root node. Now all co-located tasks will be added to co-located + * multi task slot. + */ +public class SlotSharingManager { + + /** Lock for the internal data structures. */ + private final Object lock = new Object(); + + private final SlotSharingGroupId slotSharingGroupId; + + /** Actions to release allocated slots after a complete multi task slot hierarchy has been released. */ + private final AllocatedSlotActions allocatedSlotActions; + + /** Owner of the slots to which to return them when they are released from the outside. */ + private final SlotOwner slotOwner; + + private final Map allTaskSlots; + + /** Root nodes which have not been completed because the allocated slot is still pending. */ + @GuardedBy("lock") + private final Map unresolvedRootSlots; + + /** Root nodes which have been completed (the underlying allocated slot has been assigned). */ + @GuardedBy("lock") + private final Map> resolvedRootSlots; + + SlotSharingManager( + SlotSharingGroupId slotSharingGroupId, + AllocatedSlotActions allocatedSlotActions, + SlotOwner slotOwner) { + this.slotSharingGroupId = Preconditions.checkNotNull(slotSharingGroupId); + this.allocatedSlotActions = Preconditions.checkNotNull(allocatedSlotActions); + this.slotOwner = Preconditions.checkNotNull(slotOwner); + + allTaskSlots = new HashMap<>(16); + unresolvedRootSlots = new HashMap<>(16); + resolvedRootSlots = new HashMap<>(16); + } + + public boolean isEmpty() { + return allTaskSlots.isEmpty(); + } + + public boolean contains(SlotRequestId slotRequestId) { + return allTaskSlots.containsKey(slotRequestId); + } + + @Nullable + TaskSlot getTaskSlot(SlotRequestId slotRequestId) { + return allTaskSlots.get(slotRequestId); + } + + /** + * Creates a new root slot with the given {@link SlotRequestId}, {@link SlotContext} future and + * the {@link SlotRequestId} of the allocated slot. + * + * @param slotRequestId of the root slot + * @param slotContextFuture with which we create the root slot + * @param allocatedSlotRequestId slot request id of the underlying allocated slot which can be used + * to cancel the pending slot request or release the allocated slot + * @return New root slot + */ + MultiTaskSlot createRootSlot( + SlotRequestId slotRequestId, + CompletableFuture slotContextFuture, + SlotRequestId allocatedSlotRequestId) { + final MultiTaskSlot rootMultiTaskSlot = new MultiTaskSlot( + slotRequestId, + slotContextFuture, + allocatedSlotRequestId); + + allTaskSlots.put(slotRequestId, rootMultiTaskSlot); + + synchronized (lock) { + unresolvedRootSlots.put(slotRequestId, rootMultiTaskSlot); + } + + // add the root node to the set of resolved root nodes once the SlotContext future has + // been completed and we know the slot's TaskManagerLocation + slotContextFuture.whenComplete( + (SlotContext slotContext, Throwable throwable) -> { + if (slotContext != null) { + synchronized (lock) { + final MultiTaskSlot resolvedRootNode = unresolvedRootSlots.remove(slotRequestId); + + if (resolvedRootNode != null) { + final Set innerCollection = resolvedRootSlots.computeIfAbsent( + slotContext.getTaskManagerLocation(), + taskManagerLocation -> new HashSet<>(4)); + + innerCollection.add(resolvedRootNode); + } + } + } else { + rootMultiTaskSlot.release(throwable); + } + }); + + return rootMultiTaskSlot; + } + + /** + * Gets a resolved root slot which does not yet contain the given groupId. First the given set of + * preferred locations is checked. + * + * @param groupId which the returned slot must not contain + * @param locationPreferences specifying which locations are preferred + * @return the resolved root slot and its locality wrt to the specified location preferences + * or null if there was no root slot which did not contain the given groupId + */ + @Nullable + MultiTaskSlotLocality getResolvedRootSlot(AbstractID groupId, Collection locationPreferences) { + Preconditions.checkNotNull(locationPreferences); + + final MultiTaskSlotLocality multiTaskSlotLocality; + + if (locationPreferences.isEmpty()) { + multiTaskSlotLocality = getResolvedRootSlotWithoutLocationPreferences(groupId); + } else { + multiTaskSlotLocality = getResolvedRootSlotWithLocationPreferences(groupId, locationPreferences); + } + + return multiTaskSlotLocality; + } + + /** + * Gets a resolved root slot which does not yet contain the given groupId. The method will try to + * find a slot of a TaskManager contained in the collection of preferred locations. If there is no such slot + * with free capacities available, then the method will look for slots of TaskManager which run on the same + * machine as the TaskManager in the collection of preferred locations. If there is no such slot, then any slot + * with free capacities is returned. If there is no such slot, then null is returned. + * + * @param groupId which the returned slot must not contain + * @param locationPreferences specifying which locations are preferred + * @return the resolved root slot and its locality wrt to the specified location preferences + * or null if there was not root slot which did not contain the given groupId + */ + @Nullable + private MultiTaskSlotLocality getResolvedRootSlotWithLocationPreferences(AbstractID groupId, Collection locationPreferences) { + Preconditions.checkNotNull(groupId); + Preconditions.checkNotNull(locationPreferences); + final Set hostnameSet = new HashSet<>(16); + MultiTaskSlot nonLocalMultiTaskSlot = null; + + synchronized (lock) { + for (TaskManagerLocation locationPreference : locationPreferences) { + final Set multiTaskSlots = resolvedRootSlots.get(locationPreference); + + if (multiTaskSlots != null) { + for (MultiTaskSlot multiTaskSlot : multiTaskSlots) { + if (!multiTaskSlot.contains(groupId)) { + return MultiTaskSlotLocality.of(multiTaskSlot, Locality.LOCAL); + } + } + + hostnameSet.add(locationPreference.getHostname()); + } + } + + for (Map.Entry> taskManagerLocationSetEntry : resolvedRootSlots.entrySet()) { + if (hostnameSet.contains(taskManagerLocationSetEntry.getKey().getHostname())) { + for (MultiTaskSlot multiTaskSlot : taskManagerLocationSetEntry.getValue()) { + if (!multiTaskSlot.contains(groupId)) { + return MultiTaskSlotLocality.of(multiTaskSlot, Locality.HOST_LOCAL); + } + } + } else if (nonLocalMultiTaskSlot == null) { + for (MultiTaskSlot multiTaskSlot : taskManagerLocationSetEntry.getValue()) { + if (!multiTaskSlot.contains(groupId)) { + nonLocalMultiTaskSlot = multiTaskSlot; + } + } + } + } + } + + if (nonLocalMultiTaskSlot != null) { + return MultiTaskSlotLocality.of(nonLocalMultiTaskSlot, Locality.NON_LOCAL); + } else { + return null; + } + } + + /** + * Gets a resolved slot which does not yet contain the given groupId without any location + * preferences. + * + * @param groupId which the returned slot must not contain + * @return the resolved slot or null if there was no root slot with free capacities + */ + @Nullable + private MultiTaskSlotLocality getResolvedRootSlotWithoutLocationPreferences(AbstractID groupId) { + Preconditions.checkNotNull(groupId); + + synchronized (lock) { + for (Set multiTaskSlots : resolvedRootSlots.values()) { + for (MultiTaskSlot multiTaskSlot : multiTaskSlots) { + if (!multiTaskSlot.contains(groupId)) { + return MultiTaskSlotLocality.of(multiTaskSlot, Locality.UNCONSTRAINED); + } + } + } + } + + return null; + } + + /** + * Gets an unresolved slot which does not yet contain the given groupId. An unresolved + * slot is a slot whose underlying allocated slot has not been allocated yet. + * + * @param groupId which the returned slot must not contain + * @return the unresolved slot or null if there was no root slot with free capacities + */ + @Nullable + MultiTaskSlot getUnresolvedRootSlot(AbstractID groupId) { + synchronized (lock) { + for (MultiTaskSlot multiTaskSlot : unresolvedRootSlots.values()) { + if (!multiTaskSlot.contains(groupId)) { + return multiTaskSlot; + } + } + } + + return null; + } + + // ------------------------------------------------------------------------ + // Inner classes: TaskSlot hierarchy and helper classes + // ------------------------------------------------------------------------ + + /** + * Helper class which contains a {@link MultiTaskSlot} and its {@link Locality}. + */ + static final class MultiTaskSlotLocality { + private final MultiTaskSlot multiTaskSlot; + + private final Locality locality; + + MultiTaskSlotLocality(MultiTaskSlot multiTaskSlot, Locality locality) { + this.multiTaskSlot = Preconditions.checkNotNull(multiTaskSlot); + this.locality = Preconditions.checkNotNull(locality); + } + + MultiTaskSlot getMultiTaskSlot() { + return multiTaskSlot; + } + + public Locality getLocality() { + return locality; + } + + public static MultiTaskSlotLocality of(MultiTaskSlot multiTaskSlot, Locality locality) { + return new MultiTaskSlotLocality(multiTaskSlot, locality); + } + } + + /** + * Base class for all task slots. + */ + public abstract static class TaskSlot { + // every TaskSlot has an associated slot request id + private final SlotRequestId slotRequestId; + + // all task slots except for the root slots have a group id assigned + @Nullable + private final AbstractID groupId; + + TaskSlot(SlotRequestId slotRequestId, @Nullable AbstractID groupId) { + this.slotRequestId = Preconditions.checkNotNull(slotRequestId); + this.groupId = groupId; + } + + public SlotRequestId getSlotRequestId() { + return slotRequestId; + } + + @Nullable + public AbstractID getGroupId() { + return groupId; + } + + /** + * Check whether the task slot contains the given groupId. + * + * @param groupId which to check whether it is contained + * @return true if the task slot contains the given groupId, otherwise false + */ + public boolean contains(AbstractID groupId) { + return Objects.equals(this.groupId, groupId); + } + + /** + * Release the task slot. + * + * @param cause for the release + * @return true if the slot could be released, otherwise false + */ + public abstract boolean release(Throwable cause); + } + + /** + * {@link TaskSlot} implementation which can have multiple other task slots assigned as children. + */ + public final class MultiTaskSlot extends TaskSlot implements AllocatedSlot.Payload { + + private final Map children; + + // the root node has its parent set to null + @Nullable + private final MultiTaskSlot parent; + + // underlying allocated slot + private final CompletableFuture slotContextFuture; + + // slot request id of the allocated slot + @Nullable + private final SlotRequestId allocatedSlotRequestId; + + // true if we are currently releasing our children + private boolean releasingChildren; + + private MultiTaskSlot( + SlotRequestId slotRequestId, + AbstractID groupId, + MultiTaskSlot parent) { + this( + slotRequestId, + groupId, + Preconditions.checkNotNull(parent), + parent.getSlotContextFuture(), + null); + } + + private MultiTaskSlot( + SlotRequestId slotRequestId, + CompletableFuture slotContextFuture, + SlotRequestId allocatedSlotRequestId) { + this( + slotRequestId, + null, + null, + slotContextFuture, + allocatedSlotRequestId); + } + + private MultiTaskSlot( + SlotRequestId slotRequestId, + @Nullable AbstractID groupId, + MultiTaskSlot parent, + CompletableFuture slotContextFuture, + SlotRequestId allocatedSlotRequestId) { + super(slotRequestId, groupId); + + this.parent = parent; + this.slotContextFuture = Preconditions.checkNotNull(slotContextFuture); + this.allocatedSlotRequestId = allocatedSlotRequestId; + + this.children = new HashMap<>(16); + this.releasingChildren = false; + + slotContextFuture.whenComplete( + (SlotContext ignored, Throwable throwable) -> { + if (throwable != null) { + release(throwable); + } + }); + } + + CompletableFuture getSlotContextFuture() { + return slotContextFuture; + } + + /** + * Allocates a MultiTaskSlot and registers it under the given groupId at + * this MultiTaskSlot. + * + * @param slotRequestId of the new multi task slot + * @param groupId under which the new multi task slot is registered + * @return the newly allocated MultiTaskSlot + */ + MultiTaskSlot allocateMultiTaskSlot(SlotRequestId slotRequestId, AbstractID groupId) { + Preconditions.checkState(!super.contains(groupId)); + + final MultiTaskSlot inner = new MultiTaskSlot( + slotRequestId, + groupId, + this); + + children.put(groupId, inner); + + // register the newly allocated slot also at the SlotSharingManager + allTaskSlots.put(slotRequestId, inner); + + return inner; + } + + /** + * Allocates a {@link SingleTaskSlot} and registers it under the given groupId at + * this MultiTaskSlot. + * + * @param slotRequestId of the new single task slot + * @param groupId under which the new single task slot is registered + * @param locality of the allocation + * @return the newly allocated {@link SingleTaskSlot} + */ + SingleTaskSlot allocateSingleTaskSlot( + SlotRequestId slotRequestId, + AbstractID groupId, + Locality locality) { + Preconditions.checkState(!super.contains(groupId)); + + final SingleTaskSlot leaf = new SingleTaskSlot( + slotRequestId, + groupId, + this, + locality); + + children.put(groupId, leaf); + + // register the newly allocated slot also at the SlotSharingManager + allTaskSlots.put(slotRequestId, leaf); + + return leaf; + } + + /** + * Checks whether this slot or any of its children contains the given groupId. + * + * @param groupId which to check whether it is contained + * @return true if this or any of its children contains the given groupId, otherwise false + */ + @Override + public boolean contains(AbstractID groupId) { + if (super.contains(groupId)) { + return true; + } else { + for (TaskSlot taskSlot : children.values()) { + if (taskSlot.contains(groupId)) { + return true; + } + } + + return false; + } + } + + @Override + public boolean release(Throwable cause) { + releasingChildren = true; + + // first release all children and remove them if they could be released immediately + children.values().removeIf(node -> { + boolean release = node.release(cause); + + if (release) { + allTaskSlots.remove(node.getSlotRequestId()); + } + + return release; + }); + + releasingChildren = false; + + if (children.isEmpty()) { + if (parent != null) { + // we remove ourselves from our parent if we no longer have children + parent.releaseChild(getGroupId()); + } else { + // we are the root node --> remove the root node from the list of task slots + allTaskSlots.remove(getSlotRequestId()); + + if (!slotContextFuture.isDone() || slotContextFuture.isCompletedExceptionally()) { + synchronized (lock) { + // the root node should still be unresolved + unresolvedRootSlots.remove(getSlotRequestId()); + } + } else { + // the root node should be resolved --> we can access the slot context + final SlotContext slotContext = slotContextFuture.getNow(null); + + if (slotContext != null) { + synchronized (lock) { + final Set multiTaskSlots = resolvedRootSlots.get(slotContext.getTaskManagerLocation()); + + if (multiTaskSlots != null) { + multiTaskSlots.remove(this); + + if (multiTaskSlots.isEmpty()) { + resolvedRootSlots.remove(slotContext.getTaskManagerLocation()); + } + } + } + } + } + + // release the underlying allocated slot + allocatedSlotActions.releaseSlot(allocatedSlotRequestId, null, cause); + } + + return true; + } else { + return false; + } + } + + /** + * Releases the child with the given childGroupId. + * + * @param childGroupId identifying the child to release + */ + private void releaseChild(AbstractID childGroupId) { + if (!releasingChildren) { + TaskSlot child = children.remove(childGroupId); + + if (child != null) { + allTaskSlots.remove(child.getSlotRequestId()); + } + + if (children.isEmpty()) { + release(new FlinkException("Release multi task slot because all children have been released.")); + } + } + } + } + + /** + * {@link TaskSlot} implementation which harbours a {@link LogicalSlot}. The {@link SingleTaskSlot} + * cannot have any children assigned. + */ + public final class SingleTaskSlot extends TaskSlot { + private final MultiTaskSlot parent; + + // future containing a LogicalSlot which is completed once the underlying SlotContext future is completed + private final CompletableFuture logicalSlotFuture; + + private SingleTaskSlot( + SlotRequestId slotRequestId, + AbstractID groupId, + MultiTaskSlot parent, + Locality locality) { + super(slotRequestId, groupId); + + this.parent = Preconditions.checkNotNull(parent); + + Preconditions.checkNotNull(locality); + logicalSlotFuture = parent.getSlotContextFuture() + .thenApply( + (SlotContext slotContext) -> + new SingleLogicalSlot( + slotRequestId, + slotContext, + slotSharingGroupId, + locality, + slotOwner)); + } + + CompletableFuture getLogicalSlotFuture() { + return logicalSlotFuture; + } + + @Override + public boolean release(Throwable cause) { + logicalSlotFuture.completeExceptionally(cause); + + boolean pendingLogicalSlotRelease = false; + + if (logicalSlotFuture.isDone() && !logicalSlotFuture.isCompletedExceptionally()) { + // we have a single task slot which we first have to release + final LogicalSlot logicalSlot = logicalSlotFuture.getNow(null); + + if ((logicalSlot != null) && (logicalSlot.isAlive())) { + pendingLogicalSlotRelease = logicalSlot.releaseSlot(cause).isDone(); + } + } + + if (!pendingLogicalSlotRelease) { + parent.releaseChild(getGroupId()); + } + + return !pendingLogicalSlotRelease; + } + } + + // ------------------------------------------------------------------------ + // Methods and classes for testing + // ------------------------------------------------------------------------ + + /** + * Returns a collection of all resolved root slots. + * + * @return Collection of all resolved root slots + */ + @VisibleForTesting + public Collection getResolvedRootSlots() { + return new ResolvedRootSlotValues(); + } + + @VisibleForTesting + Collection getUnresolvedRootSlots() { + synchronized (lock) { + return unresolvedRootSlots.values(); + } + } + + /** + * Collection of all resolved {@link MultiTaskSlot} root slots. + */ + private final class ResolvedRootSlotValues extends AbstractCollection { + + @Override + public Iterator iterator() { + synchronized (lock) { + return new ResolvedRootSlotIterator(resolvedRootSlots.values().iterator()); + } + } + + @Override + public int size() { + int numberResolvedMultiTaskSlots = 0; + + synchronized (lock) { + for (Set multiTaskSlots : resolvedRootSlots.values()) { + numberResolvedMultiTaskSlots += multiTaskSlots.size(); + } + } + + return numberResolvedMultiTaskSlots; + } + } + + /** + * Iterator over all resolved {@link MultiTaskSlot} root slots. + */ + private static final class ResolvedRootSlotIterator implements Iterator { + private final Iterator> baseIterator; + private Iterator currentIterator; + + private ResolvedRootSlotIterator(Iterator> baseIterator) { + this.baseIterator = Preconditions.checkNotNull(baseIterator); + + if (baseIterator.hasNext()) { + currentIterator = baseIterator.next().iterator(); + } else { + currentIterator = Collections.emptyIterator(); + } + } + + @Override + public boolean hasNext() { + progressToNextElement(); + + return currentIterator.hasNext(); + } + + @Override + public MultiTaskSlot next() { + progressToNextElement(); + + return currentIterator.next(); + } + + private void progressToNextElement() { + while (baseIterator.hasNext() && !currentIterator.hasNext()) { + currentIterator = baseIterator.next().iterator(); + } + } + } +} diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/checkpoint/CheckpointSettingsSerializableTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/checkpoint/CheckpointSettingsSerializableTest.java index 7e85167773dc3..e98efc23f0ab5 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/checkpoint/CheckpointSettingsSerializableTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/checkpoint/CheckpointSettingsSerializableTest.java @@ -31,7 +31,7 @@ import org.apache.flink.runtime.executiongraph.ExecutionGraph; import org.apache.flink.runtime.executiongraph.ExecutionGraphBuilder; import org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy; -import org.apache.flink.runtime.instance.SlotProvider; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.jobgraph.JobGraph; import org.apache.flink.runtime.jobgraph.JobVertexID; import org.apache.flink.runtime.jobgraph.tasks.ExternalizedCheckpointSettings; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/deployment/InputChannelDeploymentDescriptorTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/deployment/InputChannelDeploymentDescriptorTest.java index fc2c06f0127cd..6aa36b70ab889 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/deployment/InputChannelDeploymentDescriptorTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/deployment/InputChannelDeploymentDescriptorTest.java @@ -27,10 +27,10 @@ import org.apache.flink.runtime.executiongraph.ExecutionVertex; import org.apache.flink.runtime.executiongraph.IntermediateResult; import org.apache.flink.runtime.executiongraph.IntermediateResultPartition; -import org.apache.flink.runtime.instance.LogicalSlot; import org.apache.flink.runtime.io.network.ConnectionID; import org.apache.flink.runtime.io.network.partition.ResultPartitionID; import org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID; +import org.apache.flink.runtime.jobmaster.LogicalSlot; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import org.junit.Test; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ArchivedExecutionGraphTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ArchivedExecutionGraphTest.java index 0d7c8e6ac90f8..7b9d9aa5fde16 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ArchivedExecutionGraphTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ArchivedExecutionGraphTest.java @@ -36,7 +36,7 @@ import org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore; import org.apache.flink.runtime.execution.ExecutionState; import org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy; -import org.apache.flink.runtime.instance.SlotProvider; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.jobgraph.JobStatus; import org.apache.flink.runtime.jobgraph.JobVertex; import org.apache.flink.runtime.jobgraph.JobVertexID; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphDeploymentTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphDeploymentTest.java index 16da8e623eb1f..e869625f7f2c3 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphDeploymentTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphDeploymentTest.java @@ -40,7 +40,7 @@ import org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy; import org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway; import org.apache.flink.runtime.instance.Instance; -import org.apache.flink.runtime.instance.LogicalSlot; +import org.apache.flink.runtime.jobmaster.LogicalSlot; import org.apache.flink.runtime.instance.SimpleSlot; import org.apache.flink.runtime.io.network.partition.ResultPartitionType; import org.apache.flink.runtime.jobgraph.DistributionPattern; @@ -54,7 +54,7 @@ import org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings; import org.apache.flink.runtime.jobmanager.scheduler.Scheduler; import org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway; -import org.apache.flink.runtime.jobmanager.slots.SlotOwner; +import org.apache.flink.runtime.jobmaster.SlotOwner; import org.apache.flink.runtime.operators.BatchTask; import org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation; import org.apache.flink.runtime.taskmanager.TaskExecutionState; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphMetricsTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphMetricsTest.java index 92c7c61d7477f..caf89e8b23401 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphMetricsTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphMetricsTest.java @@ -27,8 +27,8 @@ import org.apache.flink.runtime.executiongraph.metrics.RestartTimeGauge; import org.apache.flink.runtime.executiongraph.restart.RestartCallback; import org.apache.flink.runtime.executiongraph.restart.RestartStrategy; -import org.apache.flink.runtime.instance.LogicalSlot; -import org.apache.flink.runtime.instance.TestingLogicalSlot; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.jobmaster.TestingLogicalSlot; import org.apache.flink.runtime.jobgraph.JobGraph; import org.apache.flink.runtime.jobgraph.JobStatus; import org.apache.flink.runtime.jobgraph.JobVertex; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphRestartTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphRestartTest.java index 80df8526313e5..2245a8c772129 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphRestartTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphRestartTest.java @@ -41,7 +41,7 @@ import org.apache.flink.runtime.instance.HardwareDescription; import org.apache.flink.runtime.instance.Instance; import org.apache.flink.runtime.instance.InstanceID; -import org.apache.flink.runtime.instance.SlotProvider; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.io.network.partition.ResultPartitionType; import org.apache.flink.runtime.jobgraph.DistributionPattern; import org.apache.flink.runtime.jobgraph.JobGraph; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphSchedulingTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphSchedulingTest.java index 18e6cf1b7fa2a..f75cb4bc06c3d 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphSchedulingTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphSchedulingTest.java @@ -28,18 +28,17 @@ import org.apache.flink.runtime.clusterframework.types.ResourceID; import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor; import org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy; -import org.apache.flink.runtime.instance.LogicalSlot; +import org.apache.flink.runtime.jobmaster.LogicalSlot; import org.apache.flink.runtime.instance.SimpleSlot; -import org.apache.flink.runtime.instance.SlotProvider; -import org.apache.flink.runtime.instance.SlotRequestID; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.io.network.partition.ResultPartitionType; import org.apache.flink.runtime.jobgraph.DistributionPattern; import org.apache.flink.runtime.jobgraph.JobGraph; import org.apache.flink.runtime.jobgraph.JobStatus; import org.apache.flink.runtime.jobgraph.JobVertex; import org.apache.flink.runtime.jobgraph.ScheduleMode; -import org.apache.flink.runtime.jobmanager.slots.SimpleSlotContext; -import org.apache.flink.runtime.jobmanager.slots.SlotOwner; +import org.apache.flink.runtime.instance.SimpleSlotContext; +import org.apache.flink.runtime.jobmaster.SlotOwner; import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; import org.apache.flink.runtime.jobmanager.slots.TestingSlotOwner; import org.apache.flink.runtime.messages.Acknowledge; @@ -448,7 +447,6 @@ private SimpleSlot createSlot(TaskManagerGateway taskManager, JobID jobId, SlotO ResourceID.generate(), InetAddress.getLoopbackAddress(), 12345); SimpleSlotContext slot = new SimpleSlotContext( - new SlotRequestID(), new AllocationID(), location, 0, diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphSuspendTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphSuspendTest.java index 65a52bce5f189..b5a29c3eb1173 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphSuspendTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphSuspendTest.java @@ -25,7 +25,7 @@ import org.apache.flink.runtime.executiongraph.restart.InfiniteDelayRestartStrategy; import org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway; import org.apache.flink.runtime.executiongraph.utils.SimpleSlotProvider; -import org.apache.flink.runtime.instance.SlotProvider; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.jobgraph.JobStatus; import org.apache.flink.runtime.jobgraph.JobVertex; import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphTestUtils.java b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphTestUtils.java index c97329fa7f076..b1ee3cc1c7de3 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphTestUtils.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionGraphTestUtils.java @@ -41,16 +41,15 @@ import org.apache.flink.runtime.instance.Instance; import org.apache.flink.runtime.instance.InstanceID; import org.apache.flink.runtime.instance.SimpleSlot; -import org.apache.flink.runtime.instance.SlotProvider; -import org.apache.flink.runtime.instance.SlotRequestID; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.jobgraph.JobGraph; import org.apache.flink.runtime.jobgraph.JobStatus; import org.apache.flink.runtime.jobgraph.JobVertex; import org.apache.flink.runtime.jobgraph.JobVertexID; import org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable; import org.apache.flink.runtime.jobmanager.scheduler.Scheduler; -import org.apache.flink.runtime.jobmanager.slots.SimpleSlotContext; -import org.apache.flink.runtime.jobmanager.slots.SlotOwner; +import org.apache.flink.runtime.instance.SimpleSlotContext; +import org.apache.flink.runtime.jobmaster.SlotOwner; import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; import org.apache.flink.runtime.messages.Acknowledge; import org.apache.flink.runtime.messages.TaskMessages.CancelTask; @@ -245,7 +244,6 @@ public static SimpleSlot createMockSimpleSlot(TaskManagerGateway gateway) { ResourceID.generate(), InetAddress.getLoopbackAddress(), 6572); final SimpleSlotContext allocatedSlot = new SimpleSlotContext( - new SlotRequestID(), new AllocationID(), location, 0, diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionTest.java index e3fd0df618ebe..46dfd4128c63b 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionTest.java @@ -22,13 +22,13 @@ import org.apache.flink.runtime.execution.ExecutionState; import org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy; import org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway; -import org.apache.flink.runtime.instance.LogicalSlot; +import org.apache.flink.runtime.jobmaster.LogicalSlot; import org.apache.flink.runtime.instance.SimpleSlot; import org.apache.flink.runtime.jobgraph.JobVertex; import org.apache.flink.runtime.jobgraph.JobVertexID; import org.apache.flink.runtime.jobmanager.scheduler.LocationPreferenceConstraint; import org.apache.flink.runtime.jobmanager.scheduler.SchedulerTestUtils; -import org.apache.flink.runtime.jobmanager.slots.SlotOwner; +import org.apache.flink.runtime.jobmaster.SlotOwner; import org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import org.apache.flink.runtime.testingUtils.TestingUtils; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionVertexDeploymentTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionVertexDeploymentTest.java index 63cebf3a5c265..d91380ed275bf 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionVertexDeploymentTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionVertexDeploymentTest.java @@ -25,14 +25,14 @@ import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor; import org.apache.flink.runtime.execution.ExecutionState; import org.apache.flink.runtime.instance.Instance; -import org.apache.flink.runtime.instance.LogicalSlot; +import org.apache.flink.runtime.jobmaster.LogicalSlot; import org.apache.flink.runtime.instance.SimpleSlot; import org.apache.flink.runtime.io.network.partition.ResultPartitionType; import org.apache.flink.runtime.jobgraph.IntermediateDataSetID; import org.apache.flink.runtime.jobgraph.JobVertexID; import org.apache.flink.runtime.jobgraph.ScheduleMode; import org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway; -import org.apache.flink.runtime.jobmanager.slots.SlotContext; +import org.apache.flink.runtime.jobmaster.SlotContext; import org.apache.flink.runtime.testingUtils.TestingUtils; import org.apache.flink.runtime.testutils.DirectScheduledExecutorService; import org.apache.flink.util.TestLogger; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionVertexLocalityTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionVertexLocalityTest.java index bffbb6a8e9d7c..274df5947c454 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionVertexLocalityTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionVertexLocalityTest.java @@ -30,16 +30,15 @@ import org.apache.flink.runtime.execution.ExecutionState; import org.apache.flink.runtime.executiongraph.restart.FixedDelayRestartStrategy; import org.apache.flink.runtime.instance.SimpleSlot; -import org.apache.flink.runtime.instance.SlotProvider; -import org.apache.flink.runtime.instance.SlotRequestID; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.io.network.partition.ResultPartitionType; import org.apache.flink.runtime.jobgraph.DistributionPattern; import org.apache.flink.runtime.jobgraph.JobGraph; import org.apache.flink.runtime.jobgraph.JobVertex; import org.apache.flink.runtime.jobgraph.JobVertexID; -import org.apache.flink.runtime.jobmanager.slots.SimpleSlotContext; -import org.apache.flink.runtime.jobmanager.slots.SlotContext; -import org.apache.flink.runtime.jobmanager.slots.SlotOwner; +import org.apache.flink.runtime.instance.SimpleSlotContext; +import org.apache.flink.runtime.jobmaster.SlotContext; +import org.apache.flink.runtime.jobmaster.SlotOwner; import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import org.apache.flink.runtime.testingUtils.TestingUtils; @@ -235,7 +234,6 @@ private void initializeLocation(ExecutionVertex vertex, TaskManagerLocation loca // - exposing test methods in the ExecutionVertex leads to undesirable setters SlotContext slot = new SimpleSlotContext( - new SlotRequestID(), new AllocationID(), location, 0, diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionVertexSchedulingTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionVertexSchedulingTest.java index 9310912caab0e..25e1207aff629 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionVertexSchedulingTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ExecutionVertexSchedulingTest.java @@ -59,7 +59,7 @@ public void testSlotReleasedWhenScheduledImmediately() { final Instance instance = getInstance(new ActorTaskManagerGateway(DummyActorGateway.INSTANCE)); final SimpleSlot slot = instance.allocateSimpleSlot(); - slot.releaseInstanceSlot(); + slot.releaseSlot(); assertTrue(slot.isReleased()); Scheduler scheduler = mock(Scheduler.class); @@ -91,7 +91,7 @@ public void testSlotReleasedWhenScheduledQueued() { final Instance instance = getInstance(new ActorTaskManagerGateway(DummyActorGateway.INSTANCE)); final SimpleSlot slot = instance.allocateSimpleSlot(); - slot.releaseInstanceSlot(); + slot.releaseSlot(); assertTrue(slot.isReleased()); final CompletableFuture future = new CompletableFuture<>(); diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/FailoverRegionTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/FailoverRegionTest.java index 4d53e678f0111..c411393990cd9 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/FailoverRegionTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/FailoverRegionTest.java @@ -31,7 +31,7 @@ import org.apache.flink.runtime.executiongraph.restart.RestartStrategy; import org.apache.flink.runtime.executiongraph.utils.SimpleSlotProvider; import org.apache.flink.runtime.instance.Instance; -import org.apache.flink.runtime.instance.SlotProvider; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.io.network.partition.ResultPartitionType; import org.apache.flink.runtime.jobgraph.DistributionPattern; import org.apache.flink.runtime.jobgraph.JobStatus; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/IndividualRestartsConcurrencyTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/IndividualRestartsConcurrencyTest.java index 32ccad1f8ba51..472529679ec7d 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/IndividualRestartsConcurrencyTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/IndividualRestartsConcurrencyTest.java @@ -36,7 +36,7 @@ import org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy; import org.apache.flink.runtime.executiongraph.restart.RestartStrategy; import org.apache.flink.runtime.executiongraph.utils.SimpleSlotProvider; -import org.apache.flink.runtime.instance.SlotProvider; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.jobgraph.JobGraph; import org.apache.flink.runtime.jobgraph.JobStatus; import org.apache.flink.runtime.jobgraph.JobVertex; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/LegacyJobVertexIdTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/LegacyJobVertexIdTest.java index b5a67fdb9682c..49a6dce0059f4 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/LegacyJobVertexIdTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/LegacyJobVertexIdTest.java @@ -22,7 +22,7 @@ import org.apache.flink.api.common.time.Time; import org.apache.flink.configuration.Configuration; import org.apache.flink.runtime.executiongraph.restart.RestartStrategy; -import org.apache.flink.runtime.instance.SlotProvider; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.jobgraph.JobVertex; import org.apache.flink.runtime.jobgraph.JobVertexID; import org.apache.flink.runtime.jobgraph.OperatorID; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/PipelinedRegionFailoverConcurrencyTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/PipelinedRegionFailoverConcurrencyTest.java index 656c372786a3e..06647cc1de28c 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/PipelinedRegionFailoverConcurrencyTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/PipelinedRegionFailoverConcurrencyTest.java @@ -30,7 +30,7 @@ import org.apache.flink.runtime.executiongraph.restart.FixedDelayRestartStrategy; import org.apache.flink.runtime.executiongraph.restart.RestartStrategy; import org.apache.flink.runtime.executiongraph.utils.SimpleSlotProvider; -import org.apache.flink.runtime.instance.SlotProvider; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.jobgraph.JobGraph; import org.apache.flink.runtime.jobgraph.JobStatus; import org.apache.flink.runtime.jobgraph.JobVertex; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ProgrammedSlotProvider.java b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ProgrammedSlotProvider.java index 24affadf8a254..f44626d2a2263 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ProgrammedSlotProvider.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/ProgrammedSlotProvider.java @@ -18,8 +18,8 @@ package org.apache.flink.runtime.executiongraph; -import org.apache.flink.runtime.instance.LogicalSlot; -import org.apache.flink.runtime.instance.SlotProvider; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.jobgraph.JobVertexID; import org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/failover/PipelinedFailoverRegionBuildingTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/failover/PipelinedFailoverRegionBuildingTest.java index 4709bcee96596..f94959d93a4ca 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/failover/PipelinedFailoverRegionBuildingTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/failover/PipelinedFailoverRegionBuildingTest.java @@ -30,7 +30,7 @@ import org.apache.flink.runtime.executiongraph.ExecutionGraphBuilder; import org.apache.flink.runtime.executiongraph.ExecutionVertex; import org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy; -import org.apache.flink.runtime.instance.SlotProvider; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.io.network.partition.ResultPartitionType; import org.apache.flink.runtime.jobgraph.DistributionPattern; import org.apache.flink.runtime.jobgraph.JobGraph; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/utils/SimpleSlotProvider.java b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/utils/SimpleSlotProvider.java index 82953d6ff62b7..bffdab609caa9 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/utils/SimpleSlotProvider.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/executiongraph/utils/SimpleSlotProvider.java @@ -22,16 +22,15 @@ import org.apache.flink.runtime.clusterframework.types.AllocationID; import org.apache.flink.runtime.clusterframework.types.ResourceID; import org.apache.flink.runtime.concurrent.FutureUtils; -import org.apache.flink.runtime.instance.LogicalSlot; +import org.apache.flink.runtime.jobmaster.LogicalSlot; import org.apache.flink.runtime.instance.SimpleSlot; import org.apache.flink.runtime.instance.Slot; -import org.apache.flink.runtime.instance.SlotProvider; -import org.apache.flink.runtime.instance.SlotRequestID; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; import org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException; import org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit; -import org.apache.flink.runtime.jobmanager.slots.SlotContext; -import org.apache.flink.runtime.jobmanager.slots.SimpleSlotContext; -import org.apache.flink.runtime.jobmanager.slots.SlotOwner; +import org.apache.flink.runtime.instance.SimpleSlotContext; +import org.apache.flink.runtime.jobmaster.SlotContext; +import org.apache.flink.runtime.jobmaster.SlotOwner; import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import org.apache.flink.util.Preconditions; @@ -63,7 +62,6 @@ public SimpleSlotProvider(JobID jobId, int numSlots, TaskManagerGateway taskMana for (int i = 0; i < numSlots; i++) { SimpleSlotContext as = new SimpleSlotContext( - new SlotRequestID(), new AllocationID(), new TaskManagerLocation(ResourceID.generate(), InetAddress.getLoopbackAddress(), 10000 + i), 0, diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/InstanceTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/instance/InstanceTest.java index 229237da41074..6d7d1ae7f597c 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/InstanceTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/instance/InstanceTest.java @@ -84,10 +84,10 @@ public void testAllocatingAndCancellingSlots() { } // release the slots. this returns them to the instance - slot1.releaseInstanceSlot(); - slot2.releaseInstanceSlot(); - slot3.releaseInstanceSlot(); - slot4.releaseInstanceSlot(); + slot1.releaseSlot(); + slot2.releaseSlot(); + slot3.releaseSlot(); + slot4.releaseSlot(); assertEquals(4, instance.getNumberOfAvailableSlots()); assertEquals(0, instance.getNumberOfAllocatedSlots()); diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/SharedSlotsTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/instance/SharedSlotsTest.java index 5104e48d6a4ee..860100a75eb0a 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/SharedSlotsTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/instance/SharedSlotsTest.java @@ -83,7 +83,7 @@ public void allocateAndReleaseEmptySlot() { assertEquals(0, slot.getRootSlotNumber()); // release the slot immediately. - slot.releaseInstanceSlot(); + slot.releaseSlot(); assertTrue(slot.isCanceled()); assertTrue(slot.isReleased()); @@ -202,7 +202,7 @@ public void allocateSimpleSlotsAndReleaseFromRoot() { assertEquals(0, assignment.getNumberOfAvailableSlotsForGroup(vid4)); // release from the root. - sharedSlot.releaseInstanceSlot(); + sharedSlot.releaseSlot(); assertTrue(sharedSlot.isReleased()); assertTrue(sub1.isReleased()); @@ -261,7 +261,7 @@ public void allocateSimpleSlotsAndReleaseFromLeaves() { // release from the leaves. - sub2.releaseInstanceSlot(); + sub2.releaseSlot(); assertTrue(sharedSlot.isAlive()); assertTrue(sub1.isAlive()); @@ -276,7 +276,7 @@ public void allocateSimpleSlotsAndReleaseFromLeaves() { assertEquals(2, sharedSlot.getNumberLeaves()); - sub1.releaseInstanceSlot(); + sub1.releaseSlot(); assertTrue(sharedSlot.isAlive()); assertTrue(sub1.isReleased()); @@ -290,7 +290,7 @@ public void allocateSimpleSlotsAndReleaseFromLeaves() { assertEquals(1, sharedSlot.getNumberLeaves()); - sub3.releaseInstanceSlot(); + sub3.releaseSlot(); assertTrue(sharedSlot.isReleased()); assertTrue(sub1.isReleased()); @@ -344,7 +344,7 @@ public void allocateAndReleaseInMixedOrder() { assertEquals(1, assignment.getNumberOfSlots()); - sub2.releaseInstanceSlot(); + sub2.releaseSlot(); assertEquals(1, sharedSlot.getNumberLeaves()); assertEquals(0, assignment.getNumberOfAvailableSlotsForGroup(vid1)); @@ -362,8 +362,8 @@ public void allocateAndReleaseInMixedOrder() { assertEquals(0, assignment.getNumberOfAvailableSlotsForGroup(vid3)); assertEquals(1, assignment.getNumberOfSlots()); - sub3.releaseInstanceSlot(); - sub1.releaseInstanceSlot(); + sub3.releaseSlot(); + sub1.releaseSlot(); assertTrue(sharedSlot.isReleased()); assertEquals(0, sharedSlot.getNumberLeaves()); @@ -439,7 +439,7 @@ public void testAllocateAndReleaseTwoLevels() { assertFalse(constraint.isAssigned()); // we do not immediately lock the location - headSlot.releaseInstanceSlot(); + headSlot.releaseSlot(); assertEquals(1, sharedSlot.getNumberLeaves()); assertNotNull(constraint.getSharedSlot()); @@ -464,8 +464,8 @@ public void testAllocateAndReleaseTwoLevels() { assertEquals(4, sharedSlot.getNumberLeaves()); // we release our co-location constraint tasks - headSlot.releaseInstanceSlot(); - tailSlot.releaseInstanceSlot(); + headSlot.releaseSlot(); + tailSlot.releaseSlot(); assertEquals(2, sharedSlot.getNumberLeaves()); assertTrue(headSlot.isReleased()); @@ -497,10 +497,10 @@ public void testAllocateAndReleaseTwoLevels() { assertEquals(constraint.getGroupId(), constraint.getSharedSlot().getGroupID()); // release all - sourceSlot.releaseInstanceSlot(); - headSlot.releaseInstanceSlot(); - tailSlot.releaseInstanceSlot(); - sinkSlot.releaseInstanceSlot(); + sourceSlot.releaseSlot(); + headSlot.releaseSlot(); + tailSlot.releaseSlot(); + sinkSlot.releaseSlot(); assertTrue(sharedSlot.isReleased()); assertTrue(sourceSlot.isReleased()); @@ -573,10 +573,10 @@ public void testReleaseTwoLevelsFromRoot() { assertEquals(4, sharedSlot.getNumberLeaves()); // release all - sourceSlot.releaseInstanceSlot(); - headSlot.releaseInstanceSlot(); - tailSlot.releaseInstanceSlot(); - sinkSlot.releaseInstanceSlot(); + sourceSlot.releaseSlot(); + headSlot.releaseSlot(); + tailSlot.releaseSlot(); + sinkSlot.releaseSlot(); assertTrue(sharedSlot.isReleased()); assertTrue(sourceSlot.isReleased()); @@ -613,7 +613,7 @@ public void testImmediateReleaseOneLevel() { SharedSlot sharedSlot = instance.allocateSharedSlot(assignment); SimpleSlot sub = assignment.addSharedSlotAndAllocateSubSlot(sharedSlot, Locality.UNCONSTRAINED, vid); - sub.releaseInstanceSlot(); + sub.releaseSlot(); assertTrue(sub.isReleased()); assertTrue(sharedSlot.isReleased()); @@ -648,7 +648,7 @@ public void testImmediateReleaseTwoLevel() { assertNull(sub.getGroupID()); assertEquals(constraint.getSharedSlot(), sub.getParent()); - sub.releaseInstanceSlot(); + sub.releaseSlot(); assertTrue(sub.isReleased()); assertTrue(sharedSlot.isReleased()); diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/SimpleSlotTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/instance/SimpleSlotTest.java index 6d572ad3dd61f..de2ae41337799 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/SimpleSlotTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/instance/SimpleSlotTest.java @@ -20,6 +20,7 @@ import org.apache.flink.runtime.clusterframework.types.ResourceID; import org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway; +import org.apache.flink.runtime.jobmaster.TestingPayload; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import org.apache.flink.util.TestLogger; @@ -43,7 +44,7 @@ public void testStateTransitions() { SimpleSlot slot = getSlot(); assertTrue(slot.isAlive()); - slot.releaseInstanceSlot(); + slot.releaseSlot(); assertFalse(slot.isAlive()); assertTrue(slot.isCanceled()); assertTrue(slot.isReleased()); @@ -111,7 +112,7 @@ public void testSetExecutionVertex() { // assign to released { SimpleSlot slot = getSlot(); - slot.releaseInstanceSlot(); + slot.releaseSlot(); assertFalse(slot.tryAssignPayload(payload1)); assertNull(slot.getPayload()); diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/SlotSharingGroupAssignmentTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/instance/SlotSharingGroupAssignmentTest.java index 28cab725a7a40..2407c1df01906 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/SlotSharingGroupAssignmentTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/instance/SlotSharingGroupAssignmentTest.java @@ -21,8 +21,8 @@ import org.apache.flink.runtime.clusterframework.types.ResourceID; import org.apache.flink.runtime.jobgraph.JobVertexID; import org.apache.flink.runtime.jobmanager.scheduler.Locality; -import org.apache.flink.runtime.jobmanager.slots.SlotOwner; import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; +import org.apache.flink.runtime.jobmaster.SlotOwner; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import org.apache.flink.util.TestLogger; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/CoLocationConstraintTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/CoLocationConstraintTest.java index d40ff6173c1a1..77d162f4d39b7 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/CoLocationConstraintTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/CoLocationConstraintTest.java @@ -143,7 +143,7 @@ public void testAssignSlotAndLockLocation() { assertEquals(instance2.getTaskManagerLocation(), constraint.getLocation()); // release the slot - slot2_1.releaseInstanceSlot(); + slot2_1.releaseSlot(); // we should still have a location assertTrue(constraint.isAssigned()); diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/ScheduleWithCoLocationHintTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/ScheduleWithCoLocationHintTest.java index 08db591885f49..19f4a2ff0674b 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/ScheduleWithCoLocationHintTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/ScheduleWithCoLocationHintTest.java @@ -19,19 +19,17 @@ package org.apache.flink.runtime.jobmanager.scheduler; import org.apache.flink.runtime.clusterframework.types.ResourceID; -import org.apache.flink.runtime.instance.Instance; -import org.apache.flink.runtime.instance.LogicalSlot; +import org.apache.flink.runtime.jobmaster.LogicalSlot; import org.apache.flink.runtime.jobgraph.JobVertexID; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; -import org.apache.flink.runtime.testingUtils.TestingUtils; -import org.apache.flink.util.TestLogger; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import java.util.Collections; import java.util.concurrent.ExecutionException; -import static org.apache.flink.runtime.jobmanager.scheduler.SchedulerTestUtils.getRandomInstance; import static org.apache.flink.runtime.jobmanager.scheduler.SchedulerTestUtils.getTestVertex; import static org.apache.flink.runtime.jobmanager.scheduler.SchedulerTestUtils.getTestVertexWithLocation; import static org.junit.Assert.assertEquals; @@ -39,606 +37,505 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -public class ScheduleWithCoLocationHintTest extends TestLogger { +@RunWith(Parameterized.class) +public class ScheduleWithCoLocationHintTest extends SchedulerTestBase { - @Test - public void scheduleAllSharedAndCoLocated() { - try { - JobVertexID jid1 = new JobVertexID(); - JobVertexID jid2 = new JobVertexID(); - - Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); - - scheduler.newInstanceAvailable(getRandomInstance(2)); - scheduler.newInstanceAvailable(getRandomInstance(2)); - scheduler.newInstanceAvailable(getRandomInstance(2)); - - assertEquals(6, scheduler.getNumberOfAvailableSlots()); - - SlotSharingGroup sharingGroup = new SlotSharingGroup(); - - CoLocationGroup ccg = new CoLocationGroup(); - CoLocationConstraint c1 = new CoLocationConstraint(ccg); - CoLocationConstraint c2 = new CoLocationConstraint(ccg); - CoLocationConstraint c3 = new CoLocationConstraint(ccg); - CoLocationConstraint c4 = new CoLocationConstraint(ccg); - CoLocationConstraint c5 = new CoLocationConstraint(ccg); - CoLocationConstraint c6 = new CoLocationConstraint(ccg); - - // schedule 4 tasks from the first vertex group - LogicalSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 6), sharingGroup, c1), false, Collections.emptyList()).get(); - LogicalSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 6), sharingGroup, c2), false, Collections.emptyList()).get(); - LogicalSlot s3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 6), sharingGroup, c3), false, Collections.emptyList()).get(); - LogicalSlot s4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 6), sharingGroup, c4), false, Collections.emptyList()).get(); - LogicalSlot s5 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 6), sharingGroup, c1), false, Collections.emptyList()).get(); - LogicalSlot s6 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 6), sharingGroup, c2), false, Collections.emptyList()).get(); - LogicalSlot s7 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 6), sharingGroup, c3), false, Collections.emptyList()).get(); - LogicalSlot s8 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 4, 6), sharingGroup, c5), false, Collections.emptyList()).get(); - LogicalSlot s9 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 5, 6), sharingGroup, c6), false, Collections.emptyList()).get(); - LogicalSlot s10 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 3, 6), sharingGroup, c4), false, Collections.emptyList()).get(); - LogicalSlot s11 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 4, 6), sharingGroup, c5), false, Collections.emptyList()).get(); - LogicalSlot s12 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 5, 6), sharingGroup, c6), false, Collections.emptyList()).get(); - - assertNotNull(s1); - assertNotNull(s2); - assertNotNull(s3); - assertNotNull(s4); - assertNotNull(s5); - assertNotNull(s6); - assertNotNull(s7); - assertNotNull(s8); - assertNotNull(s9); - assertNotNull(s10); - assertNotNull(s11); - assertNotNull(s12); - - // check that each slot got exactly two tasks - assertEquals(s1.getTaskManagerLocation(), s5.getTaskManagerLocation()); - assertEquals(s2.getTaskManagerLocation(), s6.getTaskManagerLocation()); - assertEquals(s3.getTaskManagerLocation(), s7.getTaskManagerLocation()); - assertEquals(s4.getTaskManagerLocation(), s10.getTaskManagerLocation()); - assertEquals(s8.getTaskManagerLocation(), s11.getTaskManagerLocation()); - assertEquals(s9.getTaskManagerLocation(), s12.getTaskManagerLocation()); - - assertEquals(c1.getLocation(), s1.getTaskManagerLocation()); - assertEquals(c2.getLocation(), s2.getTaskManagerLocation()); - assertEquals(c3.getLocation(), s3.getTaskManagerLocation()); - assertEquals(c4.getLocation(), s4.getTaskManagerLocation()); - assertEquals(c5.getLocation(), s8.getTaskManagerLocation()); - assertEquals(c6.getLocation(), s9.getTaskManagerLocation()); - - // check the scheduler's bookkeeping - assertEquals(0, scheduler.getNumberOfAvailableSlots()); - - // the first assignments are unconstrained, co.-scheduling is constrained - assertEquals(6, scheduler.getNumberOfLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(6, scheduler.getNumberOfUnconstrainedAssignments()); - - // release some slots, be sure that new available ones come up - s1.releaseSlot(); - s2.releaseSlot(); - s3.releaseSlot(); - s4.releaseSlot(); - s7.releaseSlot(); - s10.releaseSlot(); - s11.releaseSlot(); - s12.releaseSlot(); - assertTrue(scheduler.getNumberOfAvailableSlots() >= 1); - - LogicalSlot single = scheduler.allocateSlot( - new ScheduledUnit(getTestVertex(new JobVertexID(), 0, 1)), false, Collections.emptyList()).get(); - assertNotNull(single); - - s1.releaseSlot(); - s2.releaseSlot(); - s3.releaseSlot(); - s5.releaseSlot(); - s6.releaseSlot(); - s7.releaseSlot(); - s8.releaseSlot(); - s9.releaseSlot(); - s11.releaseSlot(); - s12.releaseSlot(); - - assertEquals(5, scheduler.getNumberOfAvailableSlots()); - - assertEquals(6, scheduler.getNumberOfLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(7, scheduler.getNumberOfUnconstrainedAssignments()); - } - catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } + public ScheduleWithCoLocationHintTest(SchedulerType schedulerType) { + super(schedulerType); } @Test - public void scheduleWithIntermediateRelease() { - try { - JobVertexID jid1 = new JobVertexID(); - JobVertexID jid2 = new JobVertexID(); - JobVertexID jid3 = new JobVertexID(); - JobVertexID jid4 = new JobVertexID(); - - Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); + public void scheduleAllSharedAndCoLocated() throws Exception { + JobVertexID jid1 = new JobVertexID(); + JobVertexID jid2 = new JobVertexID(); + + testingSlotProvider.addTaskManager(2); + testingSlotProvider.addTaskManager(2); + testingSlotProvider.addTaskManager(2); + + assertEquals(6, testingSlotProvider.getNumberOfAvailableSlots()); + + SlotSharingGroup sharingGroup = new SlotSharingGroup(); + + CoLocationGroup ccg = new CoLocationGroup(); + CoLocationConstraint c1 = new CoLocationConstraint(ccg); + CoLocationConstraint c2 = new CoLocationConstraint(ccg); + CoLocationConstraint c3 = new CoLocationConstraint(ccg); + CoLocationConstraint c4 = new CoLocationConstraint(ccg); + CoLocationConstraint c5 = new CoLocationConstraint(ccg); + CoLocationConstraint c6 = new CoLocationConstraint(ccg); + + // schedule 4 tasks from the first vertex group + LogicalSlot s1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 6, sharingGroup), sharingGroup.getSlotSharingGroupId(), c1), false, Collections.emptyList()).get(); + LogicalSlot s2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 6, sharingGroup), sharingGroup.getSlotSharingGroupId(), c2), false, Collections.emptyList()).get(); + LogicalSlot s3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 6, sharingGroup), sharingGroup.getSlotSharingGroupId(), c3), false, Collections.emptyList()).get(); + LogicalSlot s4 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 6, sharingGroup), sharingGroup.getSlotSharingGroupId(), c4), false, Collections.emptyList()).get(); + LogicalSlot s5 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 6, sharingGroup), sharingGroup.getSlotSharingGroupId(), c1), false, Collections.emptyList()).get(); + LogicalSlot s6 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 6, sharingGroup), sharingGroup.getSlotSharingGroupId(), c2), false, Collections.emptyList()).get(); + LogicalSlot s7 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 6, sharingGroup), sharingGroup.getSlotSharingGroupId(), c3), false, Collections.emptyList()).get(); + LogicalSlot s8 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 4, 6, sharingGroup), sharingGroup.getSlotSharingGroupId(), c5), false, Collections.emptyList()).get(); + LogicalSlot s9 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 5, 6, sharingGroup), sharingGroup.getSlotSharingGroupId(), c6), false, Collections.emptyList()).get(); + LogicalSlot s10 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 3, 6, sharingGroup), sharingGroup.getSlotSharingGroupId(), c4), false, Collections.emptyList()).get(); + LogicalSlot s11 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 4, 6, sharingGroup), sharingGroup.getSlotSharingGroupId(), c5), false, Collections.emptyList()).get(); + LogicalSlot s12 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 5, 6, sharingGroup), sharingGroup.getSlotSharingGroupId(), c6), false, Collections.emptyList()).get(); + + assertNotNull(s1); + assertNotNull(s2); + assertNotNull(s3); + assertNotNull(s4); + assertNotNull(s5); + assertNotNull(s6); + assertNotNull(s7); + assertNotNull(s8); + assertNotNull(s9); + assertNotNull(s10); + assertNotNull(s11); + assertNotNull(s12); + + // check that each slot got exactly two tasks + assertEquals(s1.getTaskManagerLocation(), s5.getTaskManagerLocation()); + assertEquals(s2.getTaskManagerLocation(), s6.getTaskManagerLocation()); + assertEquals(s3.getTaskManagerLocation(), s7.getTaskManagerLocation()); + assertEquals(s4.getTaskManagerLocation(), s10.getTaskManagerLocation()); + assertEquals(s8.getTaskManagerLocation(), s11.getTaskManagerLocation()); + assertEquals(s9.getTaskManagerLocation(), s12.getTaskManagerLocation()); + + assertEquals(c1.getLocation(), s1.getTaskManagerLocation()); + assertEquals(c2.getLocation(), s2.getTaskManagerLocation()); + assertEquals(c3.getLocation(), s3.getTaskManagerLocation()); + assertEquals(c4.getLocation(), s4.getTaskManagerLocation()); + assertEquals(c5.getLocation(), s8.getTaskManagerLocation()); + assertEquals(c6.getLocation(), s9.getTaskManagerLocation()); + + // check the scheduler's bookkeeping + assertEquals(0, testingSlotProvider.getNumberOfAvailableSlots()); + + // the first assignments are unconstrained, co.-scheduling is constrained + assertEquals(6, testingSlotProvider.getNumberOfLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfNonLocalizedAssignments()); + assertEquals(6, testingSlotProvider.getNumberOfUnconstrainedAssignments()); + + // release some slots, be sure that new available ones come up + s1.releaseSlot(); + s2.releaseSlot(); + s3.releaseSlot(); + s4.releaseSlot(); + s7.releaseSlot(); + s10.releaseSlot(); + s11.releaseSlot(); + s12.releaseSlot(); + assertTrue(testingSlotProvider.getNumberOfAvailableSlots() >= 1); + + LogicalSlot single = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertex(new JobVertexID(), 0, 1, null)), false, Collections.emptyList()).get(); + assertNotNull(single); + + s1.releaseSlot(); + s2.releaseSlot(); + s3.releaseSlot(); + s5.releaseSlot(); + s6.releaseSlot(); + s7.releaseSlot(); + s8.releaseSlot(); + s9.releaseSlot(); + s11.releaseSlot(); + s12.releaseSlot(); + + assertEquals(5, testingSlotProvider.getNumberOfAvailableSlots()); + + assertEquals(6, testingSlotProvider.getNumberOfLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfNonLocalizedAssignments()); + assertEquals(7, testingSlotProvider.getNumberOfUnconstrainedAssignments()); + } - Instance i1 = getRandomInstance(1); - Instance i2 = getRandomInstance(1); + @Test + public void scheduleWithIntermediateRelease() throws Exception { + JobVertexID jid1 = new JobVertexID(); + JobVertexID jid2 = new JobVertexID(); + JobVertexID jid3 = new JobVertexID(); + JobVertexID jid4 = new JobVertexID(); - scheduler.newInstanceAvailable(i1); - scheduler.newInstanceAvailable(i2); + testingSlotProvider.addTaskManager(1); + testingSlotProvider.addTaskManager(1); - assertEquals(2, scheduler.getNumberOfAvailableSlots()); + assertEquals(2, testingSlotProvider.getNumberOfAvailableSlots()); - SlotSharingGroup sharingGroup = new SlotSharingGroup(); - CoLocationConstraint c1 = new CoLocationConstraint(new CoLocationGroup()); + SlotSharingGroup sharingGroup = new SlotSharingGroup(); + CoLocationConstraint c1 = new CoLocationConstraint(new CoLocationGroup()); - LogicalSlot s1 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertex(jid1, 0, 1), sharingGroup, c1), false, Collections.emptyList()).get(); - LogicalSlot s2 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertex(jid2, 0, 1), sharingGroup, c1), false, Collections.emptyList()).get(); + LogicalSlot s1 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertex(jid1, 0, 1, sharingGroup), sharingGroup.getSlotSharingGroupId(), c1), false, Collections.emptyList()).get(); + LogicalSlot s2 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertex(jid2, 0, 1, sharingGroup), sharingGroup.getSlotSharingGroupId(), c1), false, Collections.emptyList()).get(); - LogicalSlot sSolo = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 0, 1)), false, Collections.emptyList()).get(); + LogicalSlot sSolo = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 0, 1, null)), false, Collections.emptyList()).get(); - ResourceID taskManager = s1.getTaskManagerLocation().getResourceID(); + ResourceID taskManager = s1.getTaskManagerLocation().getResourceID(); - s1.releaseSlot(); - s2.releaseSlot(); - sSolo.releaseSlot(); + s1.releaseSlot(); + s2.releaseSlot(); + sSolo.releaseSlot(); - LogicalSlot sNew = scheduler.allocateSlot( - new ScheduledUnit(getTestVertex(jid3, 0, 1), sharingGroup, c1), false, Collections.emptyList()).get(); - assertEquals(taskManager, sNew.getTaskManagerLocation().getResourceID()); + LogicalSlot sNew = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertex(jid3, 0, 1, sharingGroup), sharingGroup.getSlotSharingGroupId(), c1), false, Collections.emptyList()).get(); + assertEquals(taskManager, sNew.getTaskManagerLocation().getResourceID()); - assertEquals(2, scheduler.getNumberOfLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(2, scheduler.getNumberOfUnconstrainedAssignments()); - } - catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } + assertEquals(2, testingSlotProvider.getNumberOfLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfNonLocalizedAssignments()); + assertEquals(2, testingSlotProvider.getNumberOfUnconstrainedAssignments()); } @Test - public void scheduleWithReleaseNoResource() { - try { - JobVertexID jid1 = new JobVertexID(); - JobVertexID jid2 = new JobVertexID(); - JobVertexID jid3 = new JobVertexID(); - - Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); + public void scheduleWithReleaseNoResource() throws Exception { + JobVertexID jid1 = new JobVertexID(); + JobVertexID jid2 = new JobVertexID(); + JobVertexID jid3 = new JobVertexID(); - Instance i1 = getRandomInstance(1); - Instance i2 = getRandomInstance(1); + testingSlotProvider.addTaskManager(1); + testingSlotProvider.addTaskManager(1); - scheduler.newInstanceAvailable(i1); - scheduler.newInstanceAvailable(i2); + assertEquals(2, testingSlotProvider.getNumberOfAvailableSlots()); - assertEquals(2, scheduler.getNumberOfAvailableSlots()); + SlotSharingGroup sharingGroup = new SlotSharingGroup(); + CoLocationConstraint c1 = new CoLocationConstraint(new CoLocationGroup()); - SlotSharingGroup sharingGroup = new SlotSharingGroup(); - CoLocationConstraint c1 = new CoLocationConstraint(new CoLocationGroup()); + LogicalSlot s1 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertex(jid1, 0, 1, sharingGroup), sharingGroup.getSlotSharingGroupId(), c1), false, Collections.emptyList()).get(); + s1.releaseSlot(); - LogicalSlot s1 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertex(jid1, 0, 1), sharingGroup, c1), false, Collections.emptyList()).get(); - s1.releaseSlot(); + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 1, null)), false, Collections.emptyList()).get(); + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 2, null)), false, Collections.emptyList()).get(); - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 1)), false, Collections.emptyList()).get(); - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 2)), false, Collections.emptyList()).get(); - - try { - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 0, 1), sharingGroup, c1), false, Collections.emptyList()).get(); - fail("Scheduled even though no resource was available."); - } catch (ExecutionException e) { - assertTrue(e.getCause() instanceof NoResourceAvailableException); - } - - assertEquals(0, scheduler.getNumberOfLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(3, scheduler.getNumberOfUnconstrainedAssignments()); - } - catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); + try { + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 0, 1, sharingGroup), sharingGroup.getSlotSharingGroupId(), c1), false, Collections.emptyList()).get(); + fail("Scheduled even though no resource was available."); + } catch (ExecutionException e) { + assertTrue(e.getCause() instanceof NoResourceAvailableException); } + + assertEquals(0, testingSlotProvider.getNumberOfLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfNonLocalizedAssignments()); + assertEquals(3, testingSlotProvider.getNumberOfUnconstrainedAssignments()); } @Test - public void scheduleMixedCoLocationSlotSharing() { - try { - JobVertexID jid1 = new JobVertexID(); - JobVertexID jid2 = new JobVertexID(); - JobVertexID jid3 = new JobVertexID(); - JobVertexID jid4 = new JobVertexID(); - - Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); - scheduler.newInstanceAvailable(getRandomInstance(1)); - scheduler.newInstanceAvailable(getRandomInstance(1)); - scheduler.newInstanceAvailable(getRandomInstance(1)); - scheduler.newInstanceAvailable(getRandomInstance(1)); - - assertEquals(4, scheduler.getNumberOfAvailableSlots()); - - CoLocationGroup grp = new CoLocationGroup(); - CoLocationConstraint clc1 = new CoLocationConstraint(grp); - CoLocationConstraint clc2 = new CoLocationConstraint(grp); - CoLocationConstraint clc3 = new CoLocationConstraint(grp); - CoLocationConstraint clc4 = new CoLocationConstraint(grp); - - SlotSharingGroup shareGroup = new SlotSharingGroup(); - - // first wave - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 4), shareGroup), false, Collections.emptyList()); - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 4), shareGroup), false, Collections.emptyList()); - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 4), shareGroup), false, Collections.emptyList()); - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 4), shareGroup), false, Collections.emptyList()); - - // second wave - LogicalSlot s21 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertex(jid2, 0, 4), shareGroup, clc1), false, Collections.emptyList()).get(); - LogicalSlot s22 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertex(jid2, 2, 4), shareGroup, clc2), false, Collections.emptyList()).get(); - LogicalSlot s23 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertex(jid2, 1, 4), shareGroup, clc3), false, Collections.emptyList()).get(); - LogicalSlot s24 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertex(jid2, 3, 4), shareGroup, clc4), false, Collections.emptyList()).get(); - - // third wave - LogicalSlot s31 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertex(jid3, 1, 4), shareGroup, clc2), false, Collections.emptyList()).get(); - LogicalSlot s32 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertex(jid3, 2, 4), shareGroup, clc3), false, Collections.emptyList()).get(); - LogicalSlot s33 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertex(jid3, 3, 4), shareGroup, clc4), false, Collections.emptyList()).get(); - LogicalSlot s34 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertex(jid3, 0, 4), shareGroup, clc1), false, Collections.emptyList()).get(); - - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 0, 4), shareGroup), false, Collections.emptyList()); - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 1, 4), shareGroup), false, Collections.emptyList()); - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 2, 4), shareGroup), false, Collections.emptyList()); - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 3, 4), shareGroup), false, Collections.emptyList()); - - assertEquals(s21.getTaskManagerLocation(), s34.getTaskManagerLocation()); - assertEquals(s22.getTaskManagerLocation(), s31.getTaskManagerLocation()); - assertEquals(s23.getTaskManagerLocation(), s32.getTaskManagerLocation()); - assertEquals(s24.getTaskManagerLocation(), s33.getTaskManagerLocation()); - - assertEquals(4, scheduler.getNumberOfLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(12, scheduler.getNumberOfUnconstrainedAssignments()); - } - catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } + public void scheduleMixedCoLocationSlotSharing() throws Exception { + JobVertexID jid1 = new JobVertexID(); + JobVertexID jid2 = new JobVertexID(); + JobVertexID jid3 = new JobVertexID(); + JobVertexID jid4 = new JobVertexID(); + + testingSlotProvider.addTaskManager(1); + testingSlotProvider.addTaskManager(1); + testingSlotProvider.addTaskManager(1); + testingSlotProvider.addTaskManager(1); + + assertEquals(4, testingSlotProvider.getNumberOfAvailableSlots()); + + CoLocationGroup grp = new CoLocationGroup(); + CoLocationConstraint clc1 = new CoLocationConstraint(grp); + CoLocationConstraint clc2 = new CoLocationConstraint(grp); + CoLocationConstraint clc3 = new CoLocationConstraint(grp); + CoLocationConstraint clc4 = new CoLocationConstraint(grp); + + SlotSharingGroup shareGroup = new SlotSharingGroup(); + + // first wave + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 4, shareGroup), shareGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 4, shareGroup), shareGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 4, shareGroup), shareGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 4, shareGroup), shareGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + + // second wave + LogicalSlot s21 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertex(jid2, 0, 4, shareGroup), shareGroup.getSlotSharingGroupId(), clc1), false, Collections.emptyList()).get(); + LogicalSlot s22 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertex(jid2, 2, 4, shareGroup), shareGroup.getSlotSharingGroupId(), clc2), false, Collections.emptyList()).get(); + LogicalSlot s23 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertex(jid2, 1, 4, shareGroup), shareGroup.getSlotSharingGroupId(), clc3), false, Collections.emptyList()).get(); + LogicalSlot s24 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertex(jid2, 3, 4, shareGroup), shareGroup.getSlotSharingGroupId(), clc4), false, Collections.emptyList()).get(); + + // third wave + LogicalSlot s31 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertex(jid3, 1, 4, shareGroup), shareGroup.getSlotSharingGroupId(), clc2), false, Collections.emptyList()).get(); + LogicalSlot s32 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertex(jid3, 2, 4, shareGroup), shareGroup.getSlotSharingGroupId(), clc3), false, Collections.emptyList()).get(); + LogicalSlot s33 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertex(jid3, 3, 4, shareGroup), shareGroup.getSlotSharingGroupId(), clc4), false, Collections.emptyList()).get(); + LogicalSlot s34 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertex(jid3, 0, 4, shareGroup), shareGroup.getSlotSharingGroupId(), clc1), false, Collections.emptyList()).get(); + + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 0, 4, shareGroup), shareGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 1, 4, shareGroup), shareGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 2, 4, shareGroup), shareGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 3, 4, shareGroup), shareGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + + assertEquals(s21.getTaskManagerLocation(), s34.getTaskManagerLocation()); + assertEquals(s22.getTaskManagerLocation(), s31.getTaskManagerLocation()); + assertEquals(s23.getTaskManagerLocation(), s32.getTaskManagerLocation()); + assertEquals(s24.getTaskManagerLocation(), s33.getTaskManagerLocation()); + + assertEquals(4, testingSlotProvider.getNumberOfLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfNonLocalizedAssignments()); + assertEquals(12, testingSlotProvider.getNumberOfUnconstrainedAssignments()); } @Test - public void testGetsNonLocalFromSharingGroupFirst() { - try { - JobVertexID jid1 = new JobVertexID(); - JobVertexID jid2 = new JobVertexID(); - JobVertexID jid3 = new JobVertexID(); - - Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); - - Instance i1 = getRandomInstance(1); - Instance i2 = getRandomInstance(1); - - TaskManagerLocation loc1 = i1.getTaskManagerLocation(); - TaskManagerLocation loc2 = i2.getTaskManagerLocation(); - - scheduler.newInstanceAvailable(i2); - scheduler.newInstanceAvailable(i1); - - assertEquals(2, scheduler.getNumberOfAvailableSlots()); - - SlotSharingGroup sharingGroup = new SlotSharingGroup(); - - CoLocationGroup ccg = new CoLocationGroup(); - CoLocationConstraint cc1 = new CoLocationConstraint(ccg); - CoLocationConstraint cc2 = new CoLocationConstraint(ccg); - - // schedule something into the shared group so that both instances are in the sharing group - LogicalSlot s1 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid1, 0, 2, loc1), sharingGroup), false, Collections.singleton(loc1)).get(); - LogicalSlot s2 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid1, 1, 2, loc2), sharingGroup), false, Collections.singleton(loc2)).get(); - - // schedule one locally to instance 1 - LogicalSlot s3 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid2, 0, 2, loc1), sharingGroup, cc1), false, Collections.singleton(loc1)).get(); - - // schedule with co location constraint (yet unassigned) and a preference for - // instance 1, but it can only get instance 2 - LogicalSlot s4 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid2, 1, 2, loc1), sharingGroup, cc2), false, Collections.singleton(loc1)).get(); - - // schedule something into the assigned co-location constraints and check that they override the - // other preferences - LogicalSlot s5 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid3, 0, 2, loc2), sharingGroup, cc1), false, Collections.singleton(loc2)).get(); - LogicalSlot s6 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid3, 1, 2, loc1), sharingGroup, cc2), false, Collections.singleton(loc1)).get(); - - // check that each slot got three - assertEquals(s1.getTaskManagerLocation(), s3.getTaskManagerLocation()); - assertEquals(s2.getTaskManagerLocation(), s4.getTaskManagerLocation()); - assertEquals(s1.getTaskManagerLocation(), s5.getTaskManagerLocation()); - assertEquals(s2.getTaskManagerLocation(), s6.getTaskManagerLocation()); - - // check the scheduler's bookkeeping - assertEquals(0, scheduler.getNumberOfAvailableSlots()); - - assertEquals(5, scheduler.getNumberOfLocalizedAssignments()); - assertEquals(1, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfUnconstrainedAssignments()); - - // release some slots, be sure that new available ones come up - s1.releaseSlot(); - s2.releaseSlot(); - s3.releaseSlot(); - s4.releaseSlot(); - s5.releaseSlot(); - s6.releaseSlot(); - assertEquals(2, scheduler.getNumberOfAvailableSlots()); - } - catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } + public void testGetsNonLocalFromSharingGroupFirst() throws Exception { + JobVertexID jid1 = new JobVertexID(); + JobVertexID jid2 = new JobVertexID(); + JobVertexID jid3 = new JobVertexID(); + + TaskManagerLocation loc1 = testingSlotProvider.addTaskManager(1); + TaskManagerLocation loc2 = testingSlotProvider.addTaskManager(1); + + assertEquals(2, testingSlotProvider.getNumberOfAvailableSlots()); + + SlotSharingGroup sharingGroup = new SlotSharingGroup(); + + CoLocationGroup ccg = new CoLocationGroup(); + CoLocationConstraint cc1 = new CoLocationConstraint(ccg); + CoLocationConstraint cc2 = new CoLocationConstraint(ccg); + + // schedule something into the shared group so that both instances are in the sharing group + LogicalSlot s1 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid1, 0, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId()), false, Collections.singleton(loc1)).get(); + LogicalSlot s2 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid1, 1, 2, sharingGroup, loc2), sharingGroup.getSlotSharingGroupId()), false, Collections.singleton(loc2)).get(); + + // schedule one locally to instance 1 + LogicalSlot s3 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid2, 0, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId(), cc1), false, Collections.singleton(loc1)).get(); + + // schedule with co location constraint (yet unassigned) and a preference for + // instance 1, but it can only get instance 2 + LogicalSlot s4 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid2, 1, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId(), cc2), false, Collections.singleton(loc1)).get(); + + // schedule something into the assigned co-location constraints and check that they override the + // other preferences + LogicalSlot s5 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid3, 0, 2, sharingGroup, loc2), sharingGroup.getSlotSharingGroupId(), cc1), false, Collections.singleton(loc2)).get(); + LogicalSlot s6 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid3, 1, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId(), cc2), false, Collections.singleton(loc1)).get(); + + // check that each slot got three + assertEquals(s1.getTaskManagerLocation(), s3.getTaskManagerLocation()); + assertEquals(s2.getTaskManagerLocation(), s4.getTaskManagerLocation()); + assertEquals(s1.getTaskManagerLocation(), s5.getTaskManagerLocation()); + assertEquals(s2.getTaskManagerLocation(), s6.getTaskManagerLocation()); + + // check the scheduler's bookkeeping + assertEquals(0, testingSlotProvider.getNumberOfAvailableSlots()); + + assertEquals(5, testingSlotProvider.getNumberOfLocalizedAssignments()); + assertTrue(1 == testingSlotProvider.getNumberOfNonLocalizedAssignments() || 1 == testingSlotProvider.getNumberOfHostLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfUnconstrainedAssignments()); + + // release some slots, be sure that new available ones come up + s1.releaseSlot(); + s2.releaseSlot(); + s3.releaseSlot(); + s4.releaseSlot(); + s5.releaseSlot(); + s6.releaseSlot(); + assertEquals(2, testingSlotProvider.getNumberOfAvailableSlots()); } @Test - public void testSlotReleasedInBetween() { - try { - JobVertexID jid1 = new JobVertexID(); - JobVertexID jid2 = new JobVertexID(); - - Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); - - Instance i1 = getRandomInstance(1); - Instance i2 = getRandomInstance(1); + public void testSlotReleasedInBetween() throws Exception { + JobVertexID jid1 = new JobVertexID(); + JobVertexID jid2 = new JobVertexID(); - TaskManagerLocation loc1 = i1.getTaskManagerLocation(); - TaskManagerLocation loc2 = i2.getTaskManagerLocation(); + TaskManagerLocation loc1 = testingSlotProvider.addTaskManager(1); + TaskManagerLocation loc2 = testingSlotProvider.addTaskManager(1); - scheduler.newInstanceAvailable(i2); - scheduler.newInstanceAvailable(i1); + assertEquals(2, testingSlotProvider.getNumberOfAvailableSlots()); - assertEquals(2, scheduler.getNumberOfAvailableSlots()); + SlotSharingGroup sharingGroup = new SlotSharingGroup(); - SlotSharingGroup sharingGroup = new SlotSharingGroup(); + CoLocationGroup ccg = new CoLocationGroup(); + CoLocationConstraint cc1 = new CoLocationConstraint(ccg); + CoLocationConstraint cc2 = new CoLocationConstraint(ccg); - CoLocationGroup ccg = new CoLocationGroup(); - CoLocationConstraint cc1 = new CoLocationConstraint(ccg); - CoLocationConstraint cc2 = new CoLocationConstraint(ccg); + LogicalSlot s1 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid1, 0, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId(), cc1), false, Collections.singleton(loc1)).get(); + LogicalSlot s2 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid1, 1, 2, sharingGroup, loc2), sharingGroup.getSlotSharingGroupId(), cc2), false, Collections.singleton(loc2)).get(); - LogicalSlot s1 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid1, 0, 2, loc1), sharingGroup, cc1), false, Collections.singleton(loc1)).get(); - LogicalSlot s2 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid1, 1, 2, loc2), sharingGroup, cc2), false, Collections.singleton(loc2)).get(); + s1.releaseSlot(); + s2.releaseSlot(); - s1.releaseSlot(); - s2.releaseSlot(); + assertEquals(2, testingSlotProvider.getNumberOfAvailableSlots()); + assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfSlots()); - assertEquals(2, scheduler.getNumberOfAvailableSlots()); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfSlots()); + LogicalSlot s3 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid2, 0, 2, sharingGroup, loc2), sharingGroup.getSlotSharingGroupId(), cc1), false, Collections.singleton(loc2)).get(); + LogicalSlot s4 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid2, 1, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId(), cc2), false, Collections.singleton(loc1)).get(); - LogicalSlot s3 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid2, 0, 2, loc2), sharingGroup, cc1), false, Collections.singleton(loc2)).get(); - LogicalSlot s4 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid2, 1, 2, loc1), sharingGroup, cc2), false, Collections.singleton(loc1)).get(); + // still preserves the previous instance mapping) + assertEquals(loc1, s3.getTaskManagerLocation()); + assertEquals(loc2, s4.getTaskManagerLocation()); - // still preserves the previous instance mapping) - assertEquals(i1.getTaskManagerLocation(), s3.getTaskManagerLocation()); - assertEquals(i2.getTaskManagerLocation(), s4.getTaskManagerLocation()); + s3.releaseSlot(); + s4.releaseSlot(); - s3.releaseSlot(); - s4.releaseSlot(); + assertEquals(2, testingSlotProvider.getNumberOfAvailableSlots()); - assertEquals(2, scheduler.getNumberOfAvailableSlots()); - - assertEquals(4, scheduler.getNumberOfLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfUnconstrainedAssignments()); - } - catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } + assertEquals(4, testingSlotProvider.getNumberOfLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfNonLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfUnconstrainedAssignments()); } @Test - public void testSlotReleasedInBetweenAndNoNewLocal() { - try { - JobVertexID jid1 = new JobVertexID(); - JobVertexID jid2 = new JobVertexID(); - JobVertexID jidx = new JobVertexID(); - - Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); - - Instance i1 = getRandomInstance(1); - Instance i2 = getRandomInstance(1); - - TaskManagerLocation loc1 = i1.getTaskManagerLocation(); - TaskManagerLocation loc2 = i2.getTaskManagerLocation(); - - scheduler.newInstanceAvailable(i2); - scheduler.newInstanceAvailable(i1); + public void testSlotReleasedInBetweenAndNoNewLocal() throws Exception { + JobVertexID jid1 = new JobVertexID(); + JobVertexID jid2 = new JobVertexID(); + JobVertexID jidx = new JobVertexID(); - assertEquals(2, scheduler.getNumberOfAvailableSlots()); + TaskManagerLocation loc1 = testingSlotProvider.addTaskManager(1); + TaskManagerLocation loc2 = testingSlotProvider.addTaskManager(1); - SlotSharingGroup sharingGroup = new SlotSharingGroup(); + assertEquals(2, testingSlotProvider.getNumberOfAvailableSlots()); - CoLocationGroup ccg = new CoLocationGroup(); - CoLocationConstraint cc1 = new CoLocationConstraint(ccg); - CoLocationConstraint cc2 = new CoLocationConstraint(ccg); + SlotSharingGroup sharingGroup = new SlotSharingGroup(); - LogicalSlot s1 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid1, 0, 2, loc1), sharingGroup, cc1), false, Collections.singleton(loc1)).get(); - LogicalSlot s2 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid1, 1, 2, loc2), sharingGroup, cc2), false, Collections.singleton(loc2)).get(); + CoLocationGroup ccg = new CoLocationGroup(); + CoLocationConstraint cc1 = new CoLocationConstraint(ccg); + CoLocationConstraint cc2 = new CoLocationConstraint(ccg); - s1.releaseSlot(); - s2.releaseSlot(); + LogicalSlot s1 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid1, 0, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId(), cc1), false, Collections.singleton(loc1)).get(); + LogicalSlot s2 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid1, 1, 2, sharingGroup, loc2), sharingGroup.getSlotSharingGroupId(), cc2), false, Collections.singleton(loc2)).get(); - assertEquals(2, scheduler.getNumberOfAvailableSlots()); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfSlots()); + s1.releaseSlot(); + s2.releaseSlot(); - LogicalSlot sa = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jidx, 0, 2)), false, Collections.emptyList()).get(); - LogicalSlot sb = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jidx, 1, 2)), false, Collections.emptyList()).get(); + assertEquals(2, testingSlotProvider.getNumberOfAvailableSlots()); + assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfSlots()); - try { - scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid2, 0, 2, loc2), sharingGroup, cc1), false, Collections.singleton(loc2)).get(); - fail("should not be able to find a resource"); - } - catch (ExecutionException e) { - assertTrue(e.getCause() instanceof NoResourceAvailableException); - } - catch (Exception e) { - fail("wrong exception"); - } + LogicalSlot sa = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jidx, 0, 2, null)), false, Collections.emptyList()).get(); + LogicalSlot sb = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jidx, 1, 2, null)), false, Collections.emptyList()).get(); - sa.releaseSlot(); - sb.releaseSlot(); - - assertEquals(2, scheduler.getNumberOfAvailableSlots()); - - assertEquals(2, scheduler.getNumberOfLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(2, scheduler.getNumberOfUnconstrainedAssignments()); + try { + testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid2, 0, 2, sharingGroup, loc2), sharingGroup.getSlotSharingGroupId(), cc1), false, Collections.singleton(loc2)).get(); + fail("should not be able to find a resource"); + } + catch (ExecutionException e) { + assertTrue(e.getCause() instanceof NoResourceAvailableException); } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); + fail("wrong exception"); } - } - - @Test - public void testScheduleOutOfOrder() { - try { - JobVertexID jid1 = new JobVertexID(); - JobVertexID jid2 = new JobVertexID(); - - Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); - - Instance i1 = getRandomInstance(1); - Instance i2 = getRandomInstance(1); - - TaskManagerLocation loc1 = i1.getTaskManagerLocation(); - - scheduler.newInstanceAvailable(i2); - scheduler.newInstanceAvailable(i1); - assertEquals(2, scheduler.getNumberOfAvailableSlots()); + sa.releaseSlot(); + sb.releaseSlot(); - SlotSharingGroup sharingGroup = new SlotSharingGroup(); + assertEquals(2, testingSlotProvider.getNumberOfAvailableSlots()); - CoLocationGroup ccg = new CoLocationGroup(); - CoLocationConstraint cc1 = new CoLocationConstraint(ccg); - CoLocationConstraint cc2 = new CoLocationConstraint(ccg); - - // schedule something from the second job vertex id before the first is filled, - // and give locality preferences that hint at using the same shared slot for both - // co location constraints (which we seek to prevent) - LogicalSlot s1 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid1, 0, 2, loc1), sharingGroup, cc1), false, Collections.singleton(loc1)).get(); - LogicalSlot s2 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid2, 0, 2, loc1), sharingGroup, cc2), false, Collections.singleton(loc1)).get(); - - LogicalSlot s3 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid2, 1, 2, loc1), sharingGroup, cc1), false, Collections.singleton(loc1)).get(); - LogicalSlot s4 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid1, 1, 2, loc1), sharingGroup, cc2), false, Collections.singleton(loc1)).get(); - - // check that each slot got three - assertEquals(s1.getTaskManagerLocation(), s3.getTaskManagerLocation()); - assertEquals(s2.getTaskManagerLocation(), s4.getTaskManagerLocation()); - - // check the scheduler's bookkeeping - assertEquals(0, scheduler.getNumberOfAvailableSlots()); - - assertEquals(3, scheduler.getNumberOfLocalizedAssignments()); - assertEquals(1, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfUnconstrainedAssignments()); - - // release some slots, be sure that new available ones come up - s1.releaseSlot(); - s2.releaseSlot(); - s3.releaseSlot(); - s4.releaseSlot(); - assertEquals(2, scheduler.getNumberOfAvailableSlots()); - - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfSlots()); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid1)); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid2)); - } - catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } + assertEquals(2, testingSlotProvider.getNumberOfLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfNonLocalizedAssignments()); + assertEquals(2, testingSlotProvider.getNumberOfUnconstrainedAssignments()); } @Test - public void nonColocationFollowsCoLocation() { - try { - JobVertexID jid1 = new JobVertexID(); - JobVertexID jid2 = new JobVertexID(); - - Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); + public void testScheduleOutOfOrder() throws Exception { + JobVertexID jid1 = new JobVertexID(); + JobVertexID jid2 = new JobVertexID(); + + TaskManagerLocation loc1 = testingSlotProvider.addTaskManager(1); + testingSlotProvider.addTaskManager(1); + + assertEquals(2, testingSlotProvider.getNumberOfAvailableSlots()); + + SlotSharingGroup sharingGroup = new SlotSharingGroup(); + + CoLocationGroup ccg = new CoLocationGroup(); + CoLocationConstraint cc1 = new CoLocationConstraint(ccg); + CoLocationConstraint cc2 = new CoLocationConstraint(ccg); + + // schedule something from the second job vertex id before the first is filled, + // and give locality preferences that hint at using the same shared slot for both + // co location constraints (which we seek to prevent) + LogicalSlot s1 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid1, 0, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId(), cc1), false, Collections.singleton(loc1)).get(); + LogicalSlot s2 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid2, 0, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId(), cc2), false, Collections.singleton(loc1)).get(); + + LogicalSlot s3 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid2, 1, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId(), cc1), false, Collections.singleton(loc1)).get(); + LogicalSlot s4 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid1, 1, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId(), cc2), false, Collections.singleton(loc1)).get(); + + // check that each slot got three + assertEquals(s1.getTaskManagerLocation(), s3.getTaskManagerLocation()); + assertEquals(s2.getTaskManagerLocation(), s4.getTaskManagerLocation()); + + // check the testingSlotProvider's bookkeeping + assertEquals(0, testingSlotProvider.getNumberOfAvailableSlots()); + + assertEquals(3, testingSlotProvider.getNumberOfLocalizedAssignments()); + assertTrue(1 == testingSlotProvider.getNumberOfNonLocalizedAssignments() || 1 == testingSlotProvider.getNumberOfHostLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfUnconstrainedAssignments()); + + // release some slots, be sure that new available ones come up + s1.releaseSlot(); + s2.releaseSlot(); + s3.releaseSlot(); + s4.releaseSlot(); + assertEquals(2, testingSlotProvider.getNumberOfAvailableSlots()); + + assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfSlots()); + assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid1)); + assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid2)); + } - Instance i1 = getRandomInstance(1); - Instance i2 = getRandomInstance(1); + @Test + public void nonColocationFollowsCoLocation() throws Exception { + JobVertexID jid1 = new JobVertexID(); + JobVertexID jid2 = new JobVertexID(); - TaskManagerLocation loc1 = i1.getTaskManagerLocation(); - TaskManagerLocation loc2 = i2.getTaskManagerLocation(); + TaskManagerLocation loc1 = testingSlotProvider.addTaskManager(1); + TaskManagerLocation loc2 = testingSlotProvider.addTaskManager(1); - scheduler.newInstanceAvailable(i2); - scheduler.newInstanceAvailable(i1); + assertEquals(2, testingSlotProvider.getNumberOfAvailableSlots()); - assertEquals(2, scheduler.getNumberOfAvailableSlots()); - - SlotSharingGroup sharingGroup = new SlotSharingGroup(); + SlotSharingGroup sharingGroup = new SlotSharingGroup(); - CoLocationGroup ccg = new CoLocationGroup(); - CoLocationConstraint cc1 = new CoLocationConstraint(ccg); - CoLocationConstraint cc2 = new CoLocationConstraint(ccg); + CoLocationGroup ccg = new CoLocationGroup(); + CoLocationConstraint cc1 = new CoLocationConstraint(ccg); + CoLocationConstraint cc2 = new CoLocationConstraint(ccg); - LogicalSlot s1 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid1, 0, 2, loc1), sharingGroup, cc1), false, Collections.emptyList()).get(); - LogicalSlot s2 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid1, 1, 2, loc2), sharingGroup, cc2), false, Collections.emptyList()).get(); + LogicalSlot s1 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid1, 0, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId(), cc1), false, Collections.singleton(loc1)).get(); + LogicalSlot s2 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid1, 1, 2, sharingGroup, loc2), sharingGroup.getSlotSharingGroupId(), cc2), false, Collections.singleton(loc2)).get(); - LogicalSlot s3 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid2, 0, 2, loc1), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s4 = scheduler.allocateSlot( - new ScheduledUnit(getTestVertexWithLocation(jid2, 1, 2, loc1), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot s3 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid2, 0, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId()), false, Collections.singleton(loc1)).get(); + LogicalSlot s4 = testingSlotProvider.allocateSlot( + new ScheduledUnit(getTestVertexWithLocation(jid2, 1, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId()), false, Collections.singleton(loc1)).get(); - // check that each slot got two - assertEquals(s1.getTaskManagerLocation(), s3.getTaskManagerLocation()); - assertEquals(s2.getTaskManagerLocation(), s4.getTaskManagerLocation()); + // check that each slot got two + assertEquals(s1.getTaskManagerLocation(), s3.getTaskManagerLocation()); + assertEquals(s2.getTaskManagerLocation(), s4.getTaskManagerLocation()); - s1.releaseSlot(); - s2.releaseSlot(); - s3.releaseSlot(); - s4.releaseSlot(); + s1.releaseSlot(); + s2.releaseSlot(); + s3.releaseSlot(); + s4.releaseSlot(); - assertEquals(2, scheduler.getNumberOfAvailableSlots()); + assertEquals(2, testingSlotProvider.getNumberOfAvailableSlots()); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfSlots()); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid1)); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid2)); - } - catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } + assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfSlots()); + assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid1)); + assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid2)); } } diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerIsolatedTasksTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerIsolatedTasksTest.java index 2ece70f6f8ef1..ff166a853e50a 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerIsolatedTasksTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerIsolatedTasksTest.java @@ -20,11 +20,13 @@ import org.apache.flink.runtime.clusterframework.types.ResourceID; import org.apache.flink.runtime.instance.Instance; -import org.apache.flink.runtime.instance.LogicalSlot; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import org.apache.flink.runtime.testingUtils.TestingUtils; -import org.apache.flink.util.TestLogger; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import java.util.ArrayList; import java.util.Arrays; @@ -40,7 +42,6 @@ import static org.apache.flink.runtime.jobmanager.scheduler.SchedulerTestUtils.areAllDistinct; import static org.apache.flink.runtime.jobmanager.scheduler.SchedulerTestUtils.getDummyTask; -import static org.apache.flink.runtime.jobmanager.scheduler.SchedulerTestUtils.getRandomInstance; import static org.apache.flink.runtime.jobmanager.scheduler.SchedulerTestUtils.getTestVertex; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -50,355 +51,257 @@ /** * Tests for the {@link Scheduler} when scheduling individual tasks. */ -public class SchedulerIsolatedTasksTest extends TestLogger { +@RunWith(Parameterized.class) +public class SchedulerIsolatedTasksTest extends SchedulerTestBase { - @Test - public void testAddAndRemoveInstance() { - try { - Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); - - Instance i1 = getRandomInstance(2); - Instance i2 = getRandomInstance(2); - Instance i3 = getRandomInstance(2); - - assertEquals(0, scheduler.getNumberOfAvailableInstances()); - assertEquals(0, scheduler.getNumberOfAvailableSlots()); - scheduler.newInstanceAvailable(i1); - assertEquals(1, scheduler.getNumberOfAvailableInstances()); - assertEquals(2, scheduler.getNumberOfAvailableSlots()); - scheduler.newInstanceAvailable(i2); - assertEquals(2, scheduler.getNumberOfAvailableInstances()); - assertEquals(4, scheduler.getNumberOfAvailableSlots()); - scheduler.newInstanceAvailable(i3); - assertEquals(3, scheduler.getNumberOfAvailableInstances()); - assertEquals(6, scheduler.getNumberOfAvailableSlots()); - - // cannot add available instance again - try { - scheduler.newInstanceAvailable(i2); - fail("Scheduler accepted instance twice"); - } - catch (IllegalArgumentException e) { - // bueno! - } - - // some instances die - assertEquals(3, scheduler.getNumberOfAvailableInstances()); - assertEquals(6, scheduler.getNumberOfAvailableSlots()); - scheduler.instanceDied(i2); - assertEquals(2, scheduler.getNumberOfAvailableInstances()); - assertEquals(4, scheduler.getNumberOfAvailableSlots()); - - // try to add a dead instance - try { - scheduler.newInstanceAvailable(i2); - fail("Scheduler accepted dead instance"); - } - catch (IllegalArgumentException e) { - // stimmt - - } - - scheduler.instanceDied(i1); - assertEquals(1, scheduler.getNumberOfAvailableInstances()); - assertEquals(2, scheduler.getNumberOfAvailableSlots()); - scheduler.instanceDied(i3); - assertEquals(0, scheduler.getNumberOfAvailableInstances()); - assertEquals(0, scheduler.getNumberOfAvailableSlots()); - - assertFalse(i1.isAlive()); - assertFalse(i2.isAlive()); - assertFalse(i3.isAlive()); - } - catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } + public SchedulerIsolatedTasksTest(SchedulerType schedulerType) { + super(schedulerType); } @Test - public void testScheduleImmediately() { + public void testScheduleImmediately() throws Exception { + assertEquals(0, testingSlotProvider.getNumberOfAvailableSlots()); + + testingSlotProvider.addTaskManager(2); + testingSlotProvider.addTaskManager(1); + testingSlotProvider.addTaskManager(2); + assertEquals(5, testingSlotProvider.getNumberOfAvailableSlots()); + + // schedule something into all slots + LogicalSlot s1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get(); + LogicalSlot s2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get(); + LogicalSlot s3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get(); + LogicalSlot s4 = testingSlotProvider.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get(); + LogicalSlot s5 = testingSlotProvider.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get(); + + // the slots should all be different + assertTrue(areAllDistinct(s1, s2, s3, s4, s5)); + try { - Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); - assertEquals(0, scheduler.getNumberOfAvailableSlots()); - - scheduler.newInstanceAvailable(getRandomInstance(2)); - scheduler.newInstanceAvailable(getRandomInstance(1)); - scheduler.newInstanceAvailable(getRandomInstance(2)); - assertEquals(5, scheduler.getNumberOfAvailableSlots()); - - // schedule something into all slots - LogicalSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get(); - LogicalSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get(); - LogicalSlot s3 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get(); - LogicalSlot s4 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get(); - LogicalSlot s5 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get(); - - // the slots should all be different - assertTrue(areAllDistinct(s1, s2, s3, s4, s5)); - - try { - scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get(); - fail("Scheduler accepted scheduling request without available resource."); - } - catch (ExecutionException e) { - assertTrue(e.getCause() instanceof NoResourceAvailableException); - } - - // release some slots again - s3.releaseSlot(); - s4.releaseSlot(); - assertEquals(2, scheduler.getNumberOfAvailableSlots()); - - // now we can schedule some more slots - LogicalSlot s6 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get(); - LogicalSlot s7 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get(); - - assertTrue(areAllDistinct(s1, s2, s3, s4, s5, s6, s7)); - - // release all - - s1.releaseSlot(); - s2.releaseSlot(); - s5.releaseSlot(); - s6.releaseSlot(); - s7.releaseSlot(); - - assertEquals(5, scheduler.getNumberOfAvailableSlots()); - - // check that slots that are released twice (accidentally) do not mess things up - - s1.releaseSlot(); - s2.releaseSlot(); - s5.releaseSlot(); - s6.releaseSlot(); - s7.releaseSlot(); - - assertEquals(5, scheduler.getNumberOfAvailableSlots()); + testingSlotProvider.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get(); + fail("Scheduler accepted scheduling request without available resource."); } - catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); + catch (ExecutionException e) { + assertTrue(e.getCause() instanceof NoResourceAvailableException); } + + // release some slots again + s3.releaseSlot(); + s4.releaseSlot(); + assertEquals(2, testingSlotProvider.getNumberOfAvailableSlots()); + + // now we can schedule some more slots + LogicalSlot s6 = testingSlotProvider.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get(); + LogicalSlot s7 = testingSlotProvider.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get(); + + assertTrue(areAllDistinct(s1, s2, s3, s4, s5, s6, s7)); + + // release all + + s1.releaseSlot(); + s2.releaseSlot(); + s5.releaseSlot(); + s6.releaseSlot(); + s7.releaseSlot(); + + assertEquals(5, testingSlotProvider.getNumberOfAvailableSlots()); + + // check that slots that are released twice (accidentally) do not mess things up + + s1.releaseSlot(); + s2.releaseSlot(); + s5.releaseSlot(); + s6.releaseSlot(); + s7.releaseSlot(); + + assertEquals(5, testingSlotProvider.getNumberOfAvailableSlots()); } @Test - public void testScheduleQueueing() { + public void testScheduleQueueing() throws Exception { final int NUM_INSTANCES = 50; final int NUM_SLOTS_PER_INSTANCE = 3; final int NUM_TASKS_TO_SCHEDULE = 2000; - try { - // note: since this test asynchronously releases slots, the executor needs release workers. - // doing the release call synchronous can lead to a deadlock - Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); + // note: since this test asynchronously releases slots, the executor needs release workers. + // doing the release call synchronous can lead to a deadlock - for (int i = 0; i < NUM_INSTANCES; i++) { - scheduler.newInstanceAvailable(getRandomInstance((int) (Math.random() * NUM_SLOTS_PER_INSTANCE) + 1)); - } + for (int i = 0; i < NUM_INSTANCES; i++) { + testingSlotProvider.addTaskManager((int) (Math.random() * NUM_SLOTS_PER_INSTANCE) + 1); + } - assertEquals(NUM_INSTANCES, scheduler.getNumberOfAvailableInstances()); - final int totalSlots = scheduler.getNumberOfAvailableSlots(); + final int totalSlots = testingSlotProvider.getNumberOfAvailableSlots(); - // all slots we ever got. - List> allAllocatedSlots = new ArrayList<>(); + // all slots we ever got. + List> allAllocatedSlots = new ArrayList<>(); - // slots that need to be released - final Set toRelease = new HashSet<>(); + // slots that need to be released + final Set toRelease = new HashSet<>(); - // flag to track errors in the concurrent thread - final AtomicBoolean errored = new AtomicBoolean(false); + // flag to track errors in the concurrent thread + final AtomicBoolean errored = new AtomicBoolean(false); - // thread to asynchronously release slots - Runnable disposer = new Runnable() { + // thread to asynchronously release slots + Runnable disposer = new Runnable() { - @Override - public void run() { - try { - int recycled = 0; - while (recycled < NUM_TASKS_TO_SCHEDULE) { - synchronized (toRelease) { - while (toRelease.isEmpty()) { - toRelease.wait(); - } + @Override + public void run() { + try { + int recycled = 0; + while (recycled < NUM_TASKS_TO_SCHEDULE) { + synchronized (toRelease) { + while (toRelease.isEmpty()) { + toRelease.wait(); + } - Iterator iter = toRelease.iterator(); - LogicalSlot next = iter.next(); - iter.remove(); + Iterator iter = toRelease.iterator(); + LogicalSlot next = iter.next(); + iter.remove(); - next.releaseSlot(); - recycled++; - } + next.releaseSlot(); + recycled++; } - } catch (Throwable t) { - errored.set(true); } + } catch (Throwable t) { + errored.set(true); } - }; - - Thread disposeThread = new Thread(disposer); - disposeThread.start(); - - for (int i = 0; i < NUM_TASKS_TO_SCHEDULE; i++) { - CompletableFuture future = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), true, Collections.emptyList()); - future.thenAcceptAsync( - (LogicalSlot slot) -> { - synchronized (toRelease) { - toRelease.add(slot); - toRelease.notifyAll(); - } - }, - TestingUtils.defaultExecutionContext()); - allAllocatedSlots.add(future); } + }; - disposeThread.join(); + Thread disposeThread = new Thread(disposer); + disposeThread.start(); - assertFalse("The slot releasing thread caused an error.", errored.get()); - - List slotsAfter = new ArrayList<>(); - for (CompletableFuture future : allAllocatedSlots) { - slotsAfter.add(future.get()); - } + for (int i = 0; i < NUM_TASKS_TO_SCHEDULE; i++) { + CompletableFuture future = testingSlotProvider.allocateSlot(new ScheduledUnit(getDummyTask()), true, Collections.emptyList()); + future.thenAcceptAsync( + (LogicalSlot slot) -> { + synchronized (toRelease) { + toRelease.add(slot); + toRelease.notifyAll(); + } + }, + TestingUtils.defaultExecutionContext()); + allAllocatedSlots.add(future); + } - assertEquals("All instances should have available slots.", NUM_INSTANCES, - scheduler.getNumberOfInstancesWithAvailableSlots()); + disposeThread.join(); - // the slots should all be different - assertTrue(areAllDistinct(slotsAfter.toArray())); + assertFalse("The slot releasing thread caused an error.", errored.get()); - assertEquals("All slots should be available.", totalSlots, - scheduler.getNumberOfAvailableSlots()); - } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); + List slotsAfter = new ArrayList<>(); + for (CompletableFuture future : allAllocatedSlots) { + slotsAfter.add(future.get()); } + + // the slots should all be different + assertTrue(areAllDistinct(slotsAfter.toArray())); + + assertEquals("All slots should be available.", totalSlots, + testingSlotProvider.getNumberOfAvailableSlots()); } @Test - public void testScheduleWithDyingInstances() { - try { - Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); - - Instance i1 = getRandomInstance(2); - Instance i2 = getRandomInstance(2); - Instance i3 = getRandomInstance(1); - - scheduler.newInstanceAvailable(i1); - scheduler.newInstanceAvailable(i2); - scheduler.newInstanceAvailable(i3); - - List slots = new ArrayList<>(); - slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get()); - slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get()); - slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get()); - slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get()); - slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get()); - - i2.markDead(); - - for (LogicalSlot slot : slots) { - if (slot.getTaskManagerLocation().getResourceID().equals(i2.getTaskManagerID())) { - assertFalse(slot.isAlive()); - } else { - assertTrue(slot.isAlive()); - } - - slot.releaseSlot(); - } - - assertEquals(3, scheduler.getNumberOfAvailableSlots()); - - i1.markDead(); - i3.markDead(); - - // cannot get another slot, since all instances are dead - try { - scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get(); - fail("Scheduler served a slot from a dead instance"); - } - catch (ExecutionException e) { - assertTrue(e.getCause() instanceof NoResourceAvailableException); - } - catch (Exception e) { - fail("Wrong exception type."); + public void testScheduleWithDyingInstances() throws Exception { + final TaskManagerLocation taskManagerLocation1 = testingSlotProvider.addTaskManager(2); + final TaskManagerLocation taskManagerLocation2 = testingSlotProvider.addTaskManager(2); + final TaskManagerLocation taskManagerLocation3 = testingSlotProvider.addTaskManager(1); + + List slots = new ArrayList<>(); + slots.add(testingSlotProvider.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get()); + slots.add(testingSlotProvider.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get()); + slots.add(testingSlotProvider.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get()); + slots.add(testingSlotProvider.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get()); + slots.add(testingSlotProvider.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get()); + + testingSlotProvider.releaseTaskManager(taskManagerLocation2.getResourceID()); + + for (LogicalSlot slot : slots) { + if (slot.getTaskManagerLocation().getResourceID().equals(taskManagerLocation2.getResourceID())) { + assertFalse(slot.isAlive()); + } else { + assertTrue(slot.isAlive()); } - - // now the latest, the scheduler should have noticed (through the lazy mechanisms) - // that all instances have vanished - assertEquals(0, scheduler.getNumberOfInstancesWithAvailableSlots()); - assertEquals(0, scheduler.getNumberOfAvailableSlots()); + + slot.releaseSlot(); + } + + assertEquals(3, testingSlotProvider.getNumberOfAvailableSlots()); + + testingSlotProvider.releaseTaskManager(taskManagerLocation1.getResourceID()); + testingSlotProvider.releaseTaskManager(taskManagerLocation3.getResourceID()); + + // cannot get another slot, since all instances are dead + try { + testingSlotProvider.allocateSlot(new ScheduledUnit(getDummyTask()), false, Collections.emptyList()).get(); + fail("Scheduler served a slot from a dead instance"); + } + catch (ExecutionException e) { + assertTrue(e.getCause() instanceof NoResourceAvailableException); } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); + fail("Wrong exception type."); } + + // now the latest, the scheduler should have noticed (through the lazy mechanisms) + // that all instances have vanished + assertEquals(0, testingSlotProvider.getNumberOfAvailableSlots()); } @Test - public void testSchedulingLocation() { - try { - Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); - - Instance i1 = getRandomInstance(2); - Instance i2 = getRandomInstance(2); - Instance i3 = getRandomInstance(2); - - scheduler.newInstanceAvailable(i1); - scheduler.newInstanceAvailable(i2); - scheduler.newInstanceAvailable(i3); - - // schedule something on an arbitrary instance - LogicalSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(new Instance[0])), false, Collections.emptyList()).get(); - - // figure out how we use the location hints - ResourceID firstResourceId = s1.getTaskManagerLocation().getResourceID(); - - List instances = Arrays.asList(i1, i2, i3); - - int index = 0; - for (; index < instances.size(); index++) { - if (Objects.equals(instances.get(index).getTaskManagerID(), firstResourceId)) { - break; - } - } + public void testSchedulingLocation() throws Exception { + final TaskManagerLocation taskManagerLocation1 = testingSlotProvider.addTaskManager(2); + final TaskManagerLocation taskManagerLocation2 = testingSlotProvider.addTaskManager(2); + final TaskManagerLocation taskManagerLocation3 = testingSlotProvider.addTaskManager(2); - Instance first = instances.get(index); - Instance second = instances.get((index + 1) % instances.size()); - Instance third = instances.get((index + 2) % instances.size()); - - // something that needs to go to the first instance again - LogicalSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(s1.getTaskManagerLocation())), false, Collections.singleton(s1.getTaskManagerLocation())).get(); - assertEquals(first.getTaskManagerID(), s2.getTaskManagerLocation().getResourceID()); - - // first or second --> second, because first is full - LogicalSlot s3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(first, second)), false, Arrays.asList(first.getTaskManagerLocation(), second.getTaskManagerLocation())).get(); - assertEquals(second.getTaskManagerID(), s3.getTaskManagerLocation().getResourceID()); - - // first or third --> third (because first is full) - LogicalSlot s4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(first, third)), false, Arrays.asList(first.getTaskManagerLocation(), third.getTaskManagerLocation())).get(); - LogicalSlot s5 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(first, third)), false, Arrays.asList(first.getTaskManagerLocation(), third.getTaskManagerLocation())).get(); - assertEquals(third.getTaskManagerID(), s4.getTaskManagerLocation().getResourceID()); - assertEquals(third.getTaskManagerID(), s5.getTaskManagerLocation().getResourceID()); - - // first or third --> second, because all others are full - LogicalSlot s6 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(first, third)), false, Arrays.asList(first.getTaskManagerLocation(), third.getTaskManagerLocation())).get(); - assertEquals(second.getTaskManagerID(), s6.getTaskManagerLocation().getResourceID()); - - // release something on the first and second instance - s2.releaseSlot(); - s6.releaseSlot(); - - LogicalSlot s7 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(first, third)), false, Arrays.asList(first.getTaskManagerLocation(), third.getTaskManagerLocation())).get(); - assertEquals(first.getTaskManagerID(), s7.getTaskManagerLocation().getResourceID()); - - assertEquals(1, scheduler.getNumberOfUnconstrainedAssignments()); - assertEquals(1, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(5, scheduler.getNumberOfLocalizedAssignments()); - } - catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); + // schedule something on an arbitrary instance + LogicalSlot s1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(new Instance[0])), false, Collections.emptyList()).get(); + + // figure out how we use the location hints + ResourceID firstResourceId = s1.getTaskManagerLocation().getResourceID(); + + List taskManagerLocations = Arrays.asList( + taskManagerLocation1, + taskManagerLocation2, + taskManagerLocation3); + + int index = 0; + for (; index < taskManagerLocations.size(); index++) { + if (Objects.equals(taskManagerLocations.get(index).getResourceID(), firstResourceId)) { + break; + } } + + TaskManagerLocation first = taskManagerLocations.get(index); + TaskManagerLocation second = taskManagerLocations.get((index + 1) % taskManagerLocations.size()); + TaskManagerLocation third = taskManagerLocations.get((index + 2) % taskManagerLocations.size()); + + // something that needs to go to the first instance again + LogicalSlot s2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(s1.getTaskManagerLocation())), false, Collections.singleton(s1.getTaskManagerLocation())).get(); + assertEquals(first.getResourceID(), s2.getTaskManagerLocation().getResourceID()); + + // first or second --> second, because first is full + LogicalSlot s3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(first, second)), false, Arrays.asList(first, second)).get(); + assertEquals(second.getResourceID(), s3.getTaskManagerLocation().getResourceID()); + + // first or third --> third (because first is full) + LogicalSlot s4 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(first, third)), false, Arrays.asList(first, third)).get(); + LogicalSlot s5 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(first, third)), false, Arrays.asList(first, third)).get(); + assertEquals(third.getResourceID(), s4.getTaskManagerLocation().getResourceID()); + assertEquals(third.getResourceID(), s5.getTaskManagerLocation().getResourceID()); + + // first or third --> second, because all others are full + LogicalSlot s6 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(first, third)), false, Arrays.asList(first, third)).get(); + assertEquals(second.getResourceID(), s6.getTaskManagerLocation().getResourceID()); + + // release something on the first and second instance + s2.releaseSlot(); + s6.releaseSlot(); + + LogicalSlot s7 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(first, third)), false, Arrays.asList(first, third)).get(); + assertEquals(first.getResourceID(), s7.getTaskManagerLocation().getResourceID()); + + assertEquals(1, testingSlotProvider.getNumberOfUnconstrainedAssignments()); + assertTrue(1 == testingSlotProvider.getNumberOfNonLocalizedAssignments() || 1 == testingSlotProvider.getNumberOfHostLocalizedAssignments()); + assertEquals(5, testingSlotProvider.getNumberOfLocalizedAssignments()); } } diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerSlotSharingTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerSlotSharingTest.java index 41a7f026dd61f..025c79579b215 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerSlotSharingTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerSlotSharingTest.java @@ -18,14 +18,15 @@ package org.apache.flink.runtime.jobmanager.scheduler; -import org.apache.flink.runtime.instance.Instance; -import org.apache.flink.runtime.instance.LogicalSlot; +import org.apache.flink.runtime.clusterframework.types.ResourceID; +import org.apache.flink.runtime.jobmaster.LogicalSlot; import org.apache.flink.runtime.jobgraph.JobVertexID; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import org.apache.flink.runtime.testingUtils.TestingUtils; -import org.apache.flink.util.TestLogger; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import java.util.Collections; import java.util.Random; @@ -49,7 +50,12 @@ /** * Tests for the scheduler when scheduling tasks in slot sharing groups. */ -public class SchedulerSlotSharingTest extends TestLogger { +@RunWith(Parameterized.class) +public class SchedulerSlotSharingTest extends SchedulerTestBase { + + public SchedulerSlotSharingTest(SchedulerType schedulerType) { + super(schedulerType); + } @Test public void scheduleSingleVertexType() { @@ -57,18 +63,15 @@ public void scheduleSingleVertexType() { JobVertexID jid1 = new JobVertexID(); SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1); - - Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); - Instance i1 = getRandomInstance(2); - Instance i2 = getRandomInstance(2); - scheduler.newInstanceAvailable(i1); - scheduler.newInstanceAvailable(i2); + + final ResourceID tm1ResourceId = testingSlotProvider.addTaskManager(2).getResourceID(); + testingSlotProvider.addTaskManager(2); // schedule 4 tasks from the first vertex group - LogicalSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 8), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 8), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 8), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 8), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot s1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 8, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 8, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 8, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s4 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 8, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); assertNotNull(s1); assertNotNull(s2); @@ -79,7 +82,7 @@ public void scheduleSingleVertexType() { // we cannot schedule another task from the first vertex group try { - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 4, 8), sharingGroup), false, Collections.emptyList()).get(); + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 4, 8, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); fail("Scheduler accepted too many tasks at the same time"); } catch (ExecutionException e) { @@ -93,7 +96,7 @@ public void scheduleSingleVertexType() { s3.releaseSlot(); // allocate another slot from that group - LogicalSlot s5 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 4, 8), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot s5 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 4, 8, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); assertNotNull(s5); // release all old slots @@ -101,9 +104,9 @@ public void scheduleSingleVertexType() { s2.releaseSlot(); s4.releaseSlot(); - LogicalSlot s6 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 5, 8), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s7 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 6, 8), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s8 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 7, 8), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot s6 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 5, 8, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s7 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 6, 8, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s8 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 7, 8, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); assertNotNull(s6); assertNotNull(s7); @@ -111,10 +114,10 @@ public void scheduleSingleVertexType() { // make sure we have two slots on the first instance, and two on the second int c = 0; - c += (s5.getTaskManagerLocation().equals(i1.getTaskManagerLocation())) ? 1 : -1; - c += (s6.getTaskManagerLocation().equals(i1.getTaskManagerLocation())) ? 1 : -1; - c += (s7.getTaskManagerLocation().equals(i1.getTaskManagerLocation())) ? 1 : -1; - c += (s8.getTaskManagerLocation().equals(i1.getTaskManagerLocation())) ? 1 : -1; + c += (s5.getTaskManagerLocation().getResourceID().equals(tm1ResourceId)) ? 1 : -1; + c += (s6.getTaskManagerLocation().getResourceID().equals(tm1ResourceId)) ? 1 : -1; + c += (s7.getTaskManagerLocation().getResourceID().equals(tm1ResourceId)) ? 1 : -1; + c += (s8.getTaskManagerLocation().getResourceID().equals(tm1ResourceId)) ? 1 : -1; assertEquals(0, c); // release all @@ -124,12 +127,12 @@ public void scheduleSingleVertexType() { s8.releaseSlot(); // test that everything is released - assertEquals(4, scheduler.getNumberOfAvailableSlots()); + assertEquals(4, testingSlotProvider.getNumberOfAvailableSlots()); // check the scheduler's bookkeeping - assertEquals(0, scheduler.getNumberOfLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(8, scheduler.getNumberOfUnconstrainedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfNonLocalizedAssignments()); + assertEquals(8, testingSlotProvider.getNumberOfUnconstrainedAssignments()); } catch (Exception e) { e.printStackTrace(); @@ -138,123 +141,116 @@ public void scheduleSingleVertexType() { } @Test - public void allocateSlotWithSharing() { + public void allocateSlotWithSharing() throws Exception { + JobVertexID jid1 = new JobVertexID(); + JobVertexID jid2 = new JobVertexID(); + + SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1, jid2); + + testingSlotProvider.addTaskManager(2); + testingSlotProvider.addTaskManager(2); + + // schedule 4 tasks from the first vertex group + LogicalSlot s1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s4 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + + assertNotNull(s1); + assertNotNull(s2); + assertNotNull(s3); + assertNotNull(s4); + + assertTrue(areAllDistinct(s1, s2, s3, s4)); + + // we cannot schedule another task from the first vertex group try { - JobVertexID jid1 = new JobVertexID(); - JobVertexID jid2 = new JobVertexID(); - - SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1, jid2); - - Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); - scheduler.newInstanceAvailable(getRandomInstance(2)); - scheduler.newInstanceAvailable(getRandomInstance(2)); - - // schedule 4 tasks from the first vertex group - LogicalSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 5), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 5), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 5), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 5), sharingGroup), false, Collections.emptyList()).get(); - - assertNotNull(s1); - assertNotNull(s2); - assertNotNull(s3); - assertNotNull(s4); - - assertTrue(areAllDistinct(s1, s2, s3, s4)); - - // we cannot schedule another task from the first vertex group - try { - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 4, 5), sharingGroup), false, Collections.emptyList()).get(); - fail("Scheduler accepted too many tasks at the same time"); - } - catch (ExecutionException e) { - assertTrue(e.getCause() instanceof NoResourceAvailableException); - } - catch (Exception e) { - fail("Wrong exception."); - } - - // schedule some tasks from the second ID group - LogicalSlot s1_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 5), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s2_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 5), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s3_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 5), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s4_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 3, 5), sharingGroup), false, Collections.emptyList()).get(); - - assertNotNull(s1_2); - assertNotNull(s2_2); - assertNotNull(s3_2); - assertNotNull(s4_2); - - // we cannot schedule another task from the second vertex group - try { - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 4, 5), sharingGroup), false, Collections.emptyList()).get(); - fail("Scheduler accepted too many tasks at the same time"); - } - catch (ExecutionException e) { - assertTrue(e.getCause() instanceof NoResourceAvailableException); - } - catch (Exception e) { - fail("Wrong exception."); - } - - // now, we release some vertices (sub-slots) from the first group. - // that should allow us to schedule more vertices from the first group - s1.releaseSlot(); - s4.releaseSlot(); - - assertEquals(4, sharingGroup.getTaskAssignment().getNumberOfSlots()); - assertEquals(2, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid1)); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid2)); - - // we can still not schedule anything from the second group of vertices - try { - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 4, 5), sharingGroup), false, Collections.emptyList()).get(); - fail("Scheduler accepted too many tasks at the same time"); - } - catch (ExecutionException e) { - assertTrue(e.getCause() instanceof NoResourceAvailableException); - } - catch (Exception e) { - fail("Wrong exception."); - } - - // we can schedule something from the first vertex group - LogicalSlot s5 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 4, 5), sharingGroup), false, Collections.emptyList()).get(); - assertNotNull(s5); - - assertEquals(4, sharingGroup.getTaskAssignment().getNumberOfSlots()); - assertEquals(1, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid1)); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid2)); - - - // now we release a slot from the second vertex group and schedule another task from that group - s2_2.releaseSlot(); - LogicalSlot s5_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 4, 5), sharingGroup), false, Collections.emptyList()).get(); - assertNotNull(s5_2); - - // release all slots - s2.releaseSlot(); - s3.releaseSlot(); - s5.releaseSlot(); - - s1_2.releaseSlot(); - s3_2.releaseSlot(); - s4_2.releaseSlot(); - s5_2.releaseSlot(); - - // test that everything is released - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfSlots()); - assertEquals(4, scheduler.getNumberOfAvailableSlots()); - - // check the scheduler's bookkeeping - assertEquals(0, scheduler.getNumberOfLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(10, scheduler.getNumberOfUnconstrainedAssignments()); + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 4, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + fail("Scheduler accepted too many tasks at the same time"); + } + catch (ExecutionException e) { + assertTrue(e.getCause() instanceof NoResourceAvailableException); } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); + fail("Wrong exception."); + } + + // schedule some tasks from the second ID group + LogicalSlot s1_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s2_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s3_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s4_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 3, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + + assertNotNull(s1_2); + assertNotNull(s2_2); + assertNotNull(s3_2); + assertNotNull(s4_2); + + // we cannot schedule another task from the second vertex group + try { + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 4, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + fail("Scheduler accepted too many tasks at the same time"); + } + catch (ExecutionException e) { + assertTrue(e.getCause() instanceof NoResourceAvailableException); + } + catch (Exception e) { + fail("Wrong exception."); + } + + // now, we release some vertices (sub-slots) from the first group. + // that should allow us to schedule more vertices from the first group + s1.releaseSlot(); + s4.releaseSlot(); + + assertEquals(4, testingSlotProvider.getNumberOfSlots(sharingGroup)); + assertEquals(2, testingSlotProvider.getNumberOfAvailableSlotsForGroup(sharingGroup, jid1)); + assertEquals(0, testingSlotProvider.getNumberOfAvailableSlotsForGroup(sharingGroup, jid2)); + + // we can still not schedule anything from the second group of vertices + try { + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 4, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + fail("Scheduler accepted too many tasks at the same time"); } + catch (ExecutionException e) { + assertTrue(e.getCause() instanceof NoResourceAvailableException); + } + catch (Exception e) { + fail("Wrong exception."); + } + + // we can schedule something from the first vertex group + LogicalSlot s5 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 4, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + assertNotNull(s5); + + assertEquals(4, testingSlotProvider.getNumberOfSlots(sharingGroup)); + assertEquals(1, testingSlotProvider.getNumberOfAvailableSlotsForGroup(sharingGroup, jid1)); + assertEquals(0, testingSlotProvider.getNumberOfAvailableSlotsForGroup(sharingGroup, jid2)); + + + // now we release a slot from the second vertex group and schedule another task from that group + s2_2.releaseSlot(); + LogicalSlot s5_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 4, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + assertNotNull(s5_2); + + // release all slots + s2.releaseSlot(); + s3.releaseSlot(); + s5.releaseSlot(); + + s1_2.releaseSlot(); + s3_2.releaseSlot(); + s4_2.releaseSlot(); + s5_2.releaseSlot(); + + // test that everything is released + assertEquals(0, testingSlotProvider.getNumberOfSlots(sharingGroup)); + assertEquals(4, testingSlotProvider.getNumberOfAvailableSlots()); + + // check the scheduler's bookkeeping + assertEquals(0, testingSlotProvider.getNumberOfLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfNonLocalizedAssignments()); + assertEquals(10, testingSlotProvider.getNumberOfUnconstrainedAssignments()); } @Test @@ -264,56 +260,55 @@ public void allocateSlotWithIntermediateTotallyEmptySharingGroup() { JobVertexID jid2 = new JobVertexID(); SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1, jid2); - - Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); - scheduler.newInstanceAvailable(getRandomInstance(2)); - scheduler.newInstanceAvailable(getRandomInstance(2)); - + + testingSlotProvider.addTaskManager(2); + testingSlotProvider.addTaskManager(2); + // schedule 4 tasks from the first vertex group - LogicalSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 4), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot s1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s4 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); - assertEquals(4, sharingGroup.getTaskAssignment().getNumberOfSlots()); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid1)); - assertEquals(4, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid2)); + assertEquals(4, testingSlotProvider.getNumberOfSlots(sharingGroup)); + assertEquals(0, testingSlotProvider.getNumberOfAvailableSlotsForGroup(sharingGroup, jid1)); + assertEquals(4, testingSlotProvider.getNumberOfAvailableSlotsForGroup(sharingGroup, jid2)); s1.releaseSlot(); s2.releaseSlot(); s3.releaseSlot(); s4.releaseSlot(); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfSlots()); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid1)); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid2)); + assertEquals(0, testingSlotProvider.getNumberOfSlots(sharingGroup)); + assertEquals(0, testingSlotProvider.getNumberOfAvailableSlotsForGroup(sharingGroup, jid1)); + assertEquals(0, testingSlotProvider.getNumberOfAvailableSlotsForGroup(sharingGroup, jid2)); // schedule some tasks from the second ID group - LogicalSlot s1_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s2_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s3_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s4_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 3, 4), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot s1_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s2_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s3_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s4_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 3, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); - assertEquals(4, sharingGroup.getTaskAssignment().getNumberOfSlots()); - assertEquals(4, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid1)); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid2)); + assertEquals(4, testingSlotProvider.getNumberOfSlots(sharingGroup)); + assertEquals(4, testingSlotProvider.getNumberOfAvailableSlotsForGroup(sharingGroup, jid1)); + assertEquals(0, testingSlotProvider.getNumberOfAvailableSlotsForGroup(sharingGroup, jid2)); s1_2.releaseSlot(); s2_2.releaseSlot(); s3_2.releaseSlot(); s4_2.releaseSlot(); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfSlots()); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid1)); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid2)); + assertEquals(0, testingSlotProvider.getNumberOfSlots(sharingGroup)); + assertEquals(0, testingSlotProvider.getNumberOfAvailableSlotsForGroup(sharingGroup, jid1)); + assertEquals(0, testingSlotProvider.getNumberOfAvailableSlotsForGroup(sharingGroup, jid2)); // test that everything is released - assertEquals(4, scheduler.getNumberOfAvailableSlots()); + assertEquals(4, testingSlotProvider.getNumberOfAvailableSlots()); // check the scheduler's bookkeeping - assertEquals(0, scheduler.getNumberOfLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(8, scheduler.getNumberOfUnconstrainedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfNonLocalizedAssignments()); + assertEquals(8, testingSlotProvider.getNumberOfUnconstrainedAssignments()); } catch (Exception e) { e.printStackTrace(); @@ -329,16 +324,15 @@ public void allocateSlotWithTemporarilyEmptyVertexGroup() { JobVertexID jid3 = new JobVertexID(); SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1, jid2, jid3); - - Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); - scheduler.newInstanceAvailable(getRandomInstance(2)); - scheduler.newInstanceAvailable(getRandomInstance(2)); - + + testingSlotProvider.addTaskManager(2); + testingSlotProvider.addTaskManager(2); + // schedule 4 tasks from the first vertex group - LogicalSlot s1_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s2_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s3_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s4_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 4), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot s1_1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s2_1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s3_1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s4_1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); assertNotNull(s1_1); assertNotNull(s2_1); @@ -348,10 +342,10 @@ public void allocateSlotWithTemporarilyEmptyVertexGroup() { assertTrue(areAllDistinct(s1_1, s2_1, s3_1, s4_1)); // schedule 4 tasks from the second vertex group - LogicalSlot s1_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 7), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s2_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 7), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s3_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 7), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s4_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 3, 7), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot s1_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 7, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s2_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 7, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s3_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 7, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s4_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 3, 7, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); assertNotNull(s1_2); assertNotNull(s2_2); @@ -361,10 +355,10 @@ public void allocateSlotWithTemporarilyEmptyVertexGroup() { assertTrue(areAllDistinct(s1_2, s2_2, s3_2, s4_2)); // schedule 4 tasks from the third vertex group - LogicalSlot s1_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 0, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s2_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 1, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s3_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 2, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s4_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 3, 4), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot s1_3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 0, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s2_3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 1, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s3_3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 2, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s4_3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 3, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); assertNotNull(s1_3); assertNotNull(s2_3); @@ -376,7 +370,7 @@ public void allocateSlotWithTemporarilyEmptyVertexGroup() { // we cannot schedule another task from the second vertex group try { - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 4, 5), sharingGroup), false, Collections.emptyList()).get(); + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 4, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); fail("Scheduler accepted too many tasks at the same time"); } catch (ExecutionException e) { @@ -392,9 +386,9 @@ public void allocateSlotWithTemporarilyEmptyVertexGroup() { s3_2.releaseSlot(); s4_2.releaseSlot(); - LogicalSlot s5_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 5, 7), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s6_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 6, 7), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s7_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 7, 7), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot s5_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 5, 7, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s6_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 6, 7, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s7_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 7, 7, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); assertNotNull(s5_2); assertNotNull(s6_2); @@ -411,7 +405,7 @@ public void allocateSlotWithTemporarilyEmptyVertexGroup() { s7_2.releaseSlot(); // test that everything is released - assertEquals(0, scheduler.getNumberOfAvailableSlots()); + assertEquals(0, testingSlotProvider.getNumberOfAvailableSlots()); s1_3.releaseSlot(); s2_3.releaseSlot(); @@ -419,12 +413,12 @@ public void allocateSlotWithTemporarilyEmptyVertexGroup() { s4_3.releaseSlot(); // test that everything is released - assertEquals(4, scheduler.getNumberOfAvailableSlots()); + assertEquals(4, testingSlotProvider.getNumberOfAvailableSlots()); // check the scheduler's bookkeeping - assertEquals(0, scheduler.getNumberOfLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(15, scheduler.getNumberOfUnconstrainedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfNonLocalizedAssignments()); + assertEquals(15, testingSlotProvider.getNumberOfUnconstrainedAssignments()); } catch (Exception e) { e.printStackTrace(); @@ -440,22 +434,21 @@ public void allocateSlotWithTemporarilyEmptyVertexGroup2() { JobVertexID jid3 = new JobVertexID(); SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1, jid2); - - Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); - scheduler.newInstanceAvailable(getRandomInstance(2)); - + + testingSlotProvider.addTaskManager(2); + // schedule 1 tasks from the first vertex group and 2 from the second - LogicalSlot s1_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 2), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s2_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 2), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s2_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 2), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot s1_1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 2, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s2_1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 2, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s2_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 2, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); assertNotNull(s1_1); assertNotNull(s2_1); assertNotNull(s2_2); - assertEquals(2, sharingGroup.getTaskAssignment().getNumberOfSlots()); - assertEquals(1, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid1)); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid2)); + assertEquals(2, testingSlotProvider.getNumberOfSlots(sharingGroup)); + assertEquals(1, testingSlotProvider.getNumberOfAvailableSlotsForGroup(sharingGroup, jid1)); + assertEquals(0, testingSlotProvider.getNumberOfAvailableSlotsForGroup(sharingGroup, jid2)); // release the two from the second s2_1.releaseSlot(); @@ -463,17 +456,17 @@ public void allocateSlotWithTemporarilyEmptyVertexGroup2() { // this should free one slot so we can allocate one non-shared - LogicalSlot sx = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 0, 1)), false, Collections.emptyList()).get(); + LogicalSlot sx = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 0, 1, null)), false, Collections.emptyList()).get(); assertNotNull(sx); - assertEquals(1, sharingGroup.getTaskAssignment().getNumberOfSlots()); - assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid1)); - assertEquals(1, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid2)); + assertEquals(1, testingSlotProvider.getNumberOfSlots(sharingGroup)); + assertEquals(0, testingSlotProvider.getNumberOfAvailableSlotsForGroup(sharingGroup, jid1)); + assertEquals(1, testingSlotProvider.getNumberOfAvailableSlotsForGroup(sharingGroup, jid2)); // check the scheduler's bookkeeping - assertEquals(0, scheduler.getNumberOfLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(4, scheduler.getNumberOfUnconstrainedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfNonLocalizedAssignments()); + assertEquals(4, testingSlotProvider.getNumberOfUnconstrainedAssignments()); } catch (Exception e) { e.printStackTrace(); @@ -492,34 +485,33 @@ public void scheduleMixedSharingAndNonSharing() { JobVertexID jidC = new JobVertexID(); SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1, jid2); - - Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); - scheduler.newInstanceAvailable(getRandomInstance(3)); - scheduler.newInstanceAvailable(getRandomInstance(2)); - + + testingSlotProvider.addTaskManager(3); + testingSlotProvider.addTaskManager(2); + // schedule some individual vertices - LogicalSlot sA1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jidA, 0, 2)), false, Collections.emptyList()).get(); - LogicalSlot sA2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jidA, 1, 2)), false, Collections.emptyList()).get(); + LogicalSlot sA2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jidA, 1, 2, null)), false, Collections.emptyList()).get(); + LogicalSlot sA1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jidA, 0, 2, null)), false, Collections.emptyList()).get(); assertNotNull(sA1); assertNotNull(sA2); // schedule some vertices in the sharing group - LogicalSlot s1_0 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s1_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s2_0 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s2_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 4), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot s1_0 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s1_1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s2_0 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s2_1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); assertNotNull(s1_0); assertNotNull(s1_1); assertNotNull(s2_0); assertNotNull(s2_1); // schedule another isolated vertex - LogicalSlot sB1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jidB, 1, 3)), false, Collections.emptyList()).get(); + LogicalSlot sB1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jidB, 1, 3, null)), false, Collections.emptyList()).get(); assertNotNull(sB1); // should not be able to schedule more vertices try { - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 4), sharingGroup), false, Collections.emptyList()).get(); + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); fail("Scheduler accepted too many tasks at the same time"); } catch (ExecutionException e) { @@ -530,7 +522,7 @@ public void scheduleMixedSharingAndNonSharing() { } try { - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 4), sharingGroup), false, Collections.emptyList()).get(); + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); fail("Scheduler accepted too many tasks at the same time"); } catch (ExecutionException e) { @@ -541,7 +533,7 @@ public void scheduleMixedSharingAndNonSharing() { } try { - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jidB, 0, 3)), false, Collections.emptyList()).get(); + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jidB, 0, 3, null)), false, Collections.emptyList()).get(); fail("Scheduler accepted too many tasks at the same time"); } catch (ExecutionException e) { @@ -552,7 +544,7 @@ public void scheduleMixedSharingAndNonSharing() { } try { - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jidC, 0, 1)), false, Collections.emptyList()).get(); + testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jidC, 0, 1, null)), false, Collections.emptyList()).get(); fail("Scheduler accepted too many tasks at the same time"); } catch (ExecutionException e) { @@ -565,8 +557,8 @@ public void scheduleMixedSharingAndNonSharing() { // release some isolated task and check that the sharing group may grow sA1.releaseSlot(); - LogicalSlot s1_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s2_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 3, 4), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot s1_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s2_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 3, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); assertNotNull(s1_2); assertNotNull(s2_2); @@ -575,22 +567,22 @@ public void scheduleMixedSharingAndNonSharing() { s1_1.releaseSlot(); s2_0.releaseSlot(); - assertEquals(1, scheduler.getNumberOfAvailableSlots()); + assertEquals(1, testingSlotProvider.getNumberOfAvailableSlots()); // schedule one more no-shared task - LogicalSlot sB0 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jidB, 0, 3)), false, Collections.emptyList()).get(); + LogicalSlot sB0 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jidB, 0, 3, null)), false, Collections.emptyList()).get(); assertNotNull(sB0); // release the last of the original shared slots and allocate one more non-shared slot s2_1.releaseSlot(); - LogicalSlot sB2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jidB, 2, 3)), false, Collections.emptyList()).get(); + LogicalSlot sB2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jidB, 2, 3, null)), false, Collections.emptyList()).get(); assertNotNull(sB2); // release on non-shared and add some shared slots sA2.releaseSlot(); - LogicalSlot s1_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s2_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 4), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot s1_3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s2_3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); assertNotNull(s1_3); assertNotNull(s2_3); @@ -600,8 +592,8 @@ public void scheduleMixedSharingAndNonSharing() { s1_3.releaseSlot(); s2_3.releaseSlot(); - LogicalSlot sC0 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jidC, 1, 2)), false, Collections.emptyList()).get(); - LogicalSlot sC1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jidC, 0, 2)), false, Collections.emptyList()).get(); + LogicalSlot sC0 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jidC, 1, 2, null)), false, Collections.emptyList()).get(); + LogicalSlot sC1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jidC, 0, 2, null)), false, Collections.emptyList()).get(); assertNotNull(sC0); assertNotNull(sC1); @@ -613,12 +605,12 @@ public void scheduleMixedSharingAndNonSharing() { sC1.releaseSlot(); // test that everything is released - assertEquals(5, scheduler.getNumberOfAvailableSlots()); + assertEquals(5, testingSlotProvider.getNumberOfAvailableSlots()); // check the scheduler's bookkeeping - assertEquals(0, scheduler.getNumberOfLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(15, scheduler.getNumberOfUnconstrainedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfNonLocalizedAssignments()); + assertEquals(15, testingSlotProvider.getNumberOfUnconstrainedAssignments()); } catch (Exception e) { e.printStackTrace(); @@ -637,41 +629,34 @@ public void testLocalizedAssignment1() { SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1, jid2); - Instance i1 = getRandomInstance(2); - Instance i2 = getRandomInstance(2); - - TaskManagerLocation loc1 = i1.getTaskManagerLocation(); - TaskManagerLocation loc2 = i2.getTaskManagerLocation(); - - Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); - scheduler.newInstanceAvailable(i1); - scheduler.newInstanceAvailable(i2); - + TaskManagerLocation loc1 = testingSlotProvider.addTaskManager(2); + TaskManagerLocation loc2 = testingSlotProvider.addTaskManager(2); // schedule one to each instance - LogicalSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid1, 0, 2, loc1), sharingGroup), false, Collections.singleton(loc1)).get(); - LogicalSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid1, 1, 2, loc2), sharingGroup), false, Collections.singleton(loc2)).get(); + LogicalSlot s1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid1, 0, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId()), false, Collections.singleton(loc1)).get(); + LogicalSlot s2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid1, 1, 2, sharingGroup, loc2), sharingGroup.getSlotSharingGroupId()), false, Collections.singleton(loc2)).get(); assertNotNull(s1); assertNotNull(s2); - assertEquals(2, sharingGroup.getTaskAssignment().getNumberOfSlots()); - assertEquals(1, i1.getNumberOfAvailableSlots()); - assertEquals(1, i2.getNumberOfAvailableSlots()); + assertEquals(2, testingSlotProvider.getNumberOfSlots(sharingGroup)); + assertEquals(loc1, s1.getTaskManagerLocation()); + assertEquals(loc2, s2.getTaskManagerLocation()); // schedule one from the other group to each instance - LogicalSlot s3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid2, 0, 2, loc1), sharingGroup), false, Collections.singleton(loc1)).get(); - LogicalSlot s4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid2, 1, 2, loc2), sharingGroup), false, Collections.singleton(loc2)).get(); + LogicalSlot s3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid2, 0, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId()), false, Collections.singleton(loc1)).get(); + LogicalSlot s4 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid2, 1, 2, sharingGroup, loc2), sharingGroup.getSlotSharingGroupId()), false, Collections.singleton(loc2)).get(); assertNotNull(s3); assertNotNull(s4); - assertEquals(2, sharingGroup.getTaskAssignment().getNumberOfSlots()); - assertEquals(1, i1.getNumberOfAvailableSlots()); - assertEquals(1, i2.getNumberOfAvailableSlots()); + assertEquals(2, testingSlotProvider.getNumberOfSlots(sharingGroup)); + assertEquals(loc1, s3.getTaskManagerLocation()); + assertEquals(loc2, s4.getTaskManagerLocation()); + assertEquals(2, testingSlotProvider.getNumberOfAvailableSlots()); // check the scheduler's bookkeeping - assertEquals(4, scheduler.getNumberOfLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfUnconstrainedAssignments()); + assertEquals(4, testingSlotProvider.getNumberOfLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfNonLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfUnconstrainedAssignments()); } catch (Exception e) { e.printStackTrace(); @@ -690,41 +675,33 @@ public void testLocalizedAssignment2() { SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1, jid2); - Instance i1 = getRandomInstance(2); - Instance i2 = getRandomInstance(2); - - TaskManagerLocation loc1 = i1.getTaskManagerLocation(); - TaskManagerLocation loc2 = i2.getTaskManagerLocation(); + TaskManagerLocation loc1 = testingSlotProvider.addTaskManager(2); + TaskManagerLocation loc2 = testingSlotProvider.addTaskManager(2); - Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); - scheduler.newInstanceAvailable(i1); - scheduler.newInstanceAvailable(i2); - - // schedule one to each instance - LogicalSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid1, 0, 2, loc1), sharingGroup), false, Collections.singleton(loc1)).get(); - LogicalSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid1, 1, 2, loc1), sharingGroup), false, Collections.singleton(loc1)).get(); + LogicalSlot s1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid1, 0, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId()), false, Collections.singleton(loc1)).get(); + LogicalSlot s2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid1, 1, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId()), false, Collections.singleton(loc1)).get(); assertNotNull(s1); assertNotNull(s2); - assertEquals(2, sharingGroup.getTaskAssignment().getNumberOfSlots()); - assertEquals(0, i1.getNumberOfAvailableSlots()); - assertEquals(2, i2.getNumberOfAvailableSlots()); + assertEquals(2, testingSlotProvider.getNumberOfSlots(sharingGroup)); + assertEquals(loc1, s1.getTaskManagerLocation()); + assertEquals(loc1, s2.getTaskManagerLocation()); // schedule one from the other group to each instance - LogicalSlot s3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid2, 0, 2, loc2), sharingGroup), false, Collections.singleton(loc2)).get(); - LogicalSlot s4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid2, 1, 2, loc2), sharingGroup), false, Collections.singleton(loc2)).get(); + LogicalSlot s3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid2, 0, 2, sharingGroup, loc2), sharingGroup.getSlotSharingGroupId()), false, Collections.singleton(loc2)).get(); + LogicalSlot s4 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid2, 1, 2, sharingGroup, loc2), sharingGroup.getSlotSharingGroupId()), false, Collections.singleton(loc2)).get(); assertNotNull(s3); assertNotNull(s4); - assertEquals(4, sharingGroup.getTaskAssignment().getNumberOfSlots()); - assertEquals(0, i1.getNumberOfAvailableSlots()); - assertEquals(0, i2.getNumberOfAvailableSlots()); + assertEquals(4, testingSlotProvider.getNumberOfSlots(sharingGroup)); + assertEquals(loc2, s3.getTaskManagerLocation()); + assertEquals(loc2, s4.getTaskManagerLocation()); // check the scheduler's bookkeeping - assertEquals(4, scheduler.getNumberOfLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfUnconstrainedAssignments()); + assertEquals(4, testingSlotProvider.getNumberOfLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfNonLocalizedAssignments()); + assertEquals(0, testingSlotProvider.getNumberOfUnconstrainedAssignments()); } catch (Exception e) { e.printStackTrace(); @@ -743,24 +720,18 @@ public void testLocalizedAssignment3() { SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1, jid2); - Instance i1 = getRandomInstance(2); - Instance i2 = getRandomInstance(2); + TaskManagerLocation loc1 = testingSlotProvider.addTaskManager(2); + TaskManagerLocation loc2 = testingSlotProvider.addTaskManager(2); - TaskManagerLocation loc1 = i1.getTaskManagerLocation(); - - Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext()); - scheduler.newInstanceAvailable(i1); - scheduler.newInstanceAvailable(i2); - // schedule until the one instance is full - LogicalSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid1, 0, 2, loc1), sharingGroup), false, Collections.singleton(loc1)).get(); - LogicalSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid1, 1, 2, loc1), sharingGroup), false, Collections.singleton(loc1)).get(); - LogicalSlot s3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid2, 0, 4, loc1), sharingGroup), false, Collections.singleton(loc1)).get(); - LogicalSlot s4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid2, 1, 4, loc1), sharingGroup), false, Collections.singleton(loc1)).get(); + LogicalSlot s1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid1, 0, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId()), false, Collections.singleton(loc1)).get(); + LogicalSlot s2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid1, 1, 2, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId()), false, Collections.singleton(loc1)).get(); + LogicalSlot s3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid2, 0, 4, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId()), false, Collections.singleton(loc1)).get(); + LogicalSlot s4 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid2, 1, 4, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId()), false, Collections.singleton(loc1)).get(); // schedule two more with preference of same instance --> need to go to other instance - LogicalSlot s5 = scheduler.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid2, 3, 4, loc1), sharingGroup), false, Collections.singleton(loc1)).get(); - LogicalSlot s6 = scheduler.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid2, 4, 4, loc1), sharingGroup), false, Collections.singleton(loc1)).get(); + LogicalSlot s5 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid2, 3, 4, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId()), false, Collections.singleton(loc1)).get(); + LogicalSlot s6 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertexWithLocation(jid2, 4, 4, sharingGroup, loc1), sharingGroup.getSlotSharingGroupId()), false, Collections.singleton(loc1)).get(); assertNotNull(s1); assertNotNull(s2); @@ -769,22 +740,21 @@ public void testLocalizedAssignment3() { assertNotNull(s5); assertNotNull(s6); - assertEquals(4, sharingGroup.getTaskAssignment().getNumberOfSlots()); - - assertEquals(0, i1.getNumberOfAvailableSlots()); - assertEquals(0, i2.getNumberOfAvailableSlots()); - - assertEquals(i1.getTaskManagerLocation(), s1.getTaskManagerLocation()); - assertEquals(i1.getTaskManagerLocation(), s2.getTaskManagerLocation()); - assertEquals(i1.getTaskManagerLocation(), s3.getTaskManagerLocation()); - assertEquals(i1.getTaskManagerLocation(), s4.getTaskManagerLocation()); - assertEquals(i2.getTaskManagerLocation(), s5.getTaskManagerLocation()); - assertEquals(i2.getTaskManagerLocation(), s6.getTaskManagerLocation()); + assertEquals(4, testingSlotProvider.getNumberOfSlots(sharingGroup)); + + assertEquals(loc1, s1.getTaskManagerLocation()); + assertEquals(loc1, s2.getTaskManagerLocation()); + assertEquals(loc1, s3.getTaskManagerLocation()); + assertEquals(loc1, s4.getTaskManagerLocation()); + assertEquals(loc2, s5.getTaskManagerLocation()); + assertEquals(loc2, s6.getTaskManagerLocation()); // check the scheduler's bookkeeping - assertEquals(4, scheduler.getNumberOfLocalizedAssignments()); - assertEquals(2, scheduler.getNumberOfNonLocalizedAssignments()); - assertEquals(0, scheduler.getNumberOfUnconstrainedAssignments()); + assertEquals(4, testingSlotProvider.getNumberOfLocalizedAssignments()); + // Flip-6 supports host localized assignments which happen in this case because all TaskManagerLocations point to the loopback address + assertTrue(2 == testingSlotProvider.getNumberOfNonLocalizedAssignments() || 2 == testingSlotProvider.getNumberOfHostLocalizedAssignments()); + + assertEquals(0, testingSlotProvider.getNumberOfUnconstrainedAssignments()); } catch (Exception e) { e.printStackTrace(); @@ -802,23 +772,22 @@ public void testSequentialAllocateAndRelease() { final SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1, jid2, jid3, jid4); - final Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); - scheduler.newInstanceAvailable(getRandomInstance(4)); - + testingSlotProvider.addTaskManager(4); + // allocate something from group 1 and 2 interleaved with schedule for group 3 - LogicalSlot slot_1_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot slot_1_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 4), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot slot_1_1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot slot_1_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); - LogicalSlot slot_2_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot slot_2_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 4), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot slot_2_1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot slot_2_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); - LogicalSlot slot_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 0, 1), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot slot_3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 0, 1, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); - LogicalSlot slot_1_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot slot_1_4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 4), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot slot_1_3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot slot_1_4 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); - LogicalSlot slot_2_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot slot_2_4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 3, 4), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot slot_2_3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot slot_2_4 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 3, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); // release groups 1 and 2 @@ -834,10 +803,10 @@ public void testSequentialAllocateAndRelease() { // allocate group 4 - LogicalSlot slot_4_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 0, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot slot_4_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 1, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot slot_4_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 2, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot slot_4_4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 3, 4), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot slot_4_1 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 0, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot slot_4_2 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 1, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot slot_4_3 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 2, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot slot_4_4 = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 3, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); // release groups 3 and 4 @@ -859,6 +828,8 @@ public void testConcurrentAllocateAndRelease() { final ExecutorService executor = Executors.newFixedThreadPool(20); try { + testingSlotProvider.addTaskManager(4); + for (int run = 0; run < 50; run++) { final JobVertexID jid1 = new JobVertexID(); final JobVertexID jid2 = new JobVertexID(); @@ -866,10 +837,7 @@ public void testConcurrentAllocateAndRelease() { final JobVertexID jid4 = new JobVertexID(); final SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1, jid2, jid3, jid4); - - final Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); - scheduler.newInstanceAvailable(getRandomInstance(4)); - + final AtomicInteger enumerator1 = new AtomicInteger(); final AtomicInteger enumerator2 = new AtomicInteger(); final AtomicBoolean flag3 = new AtomicBoolean(); @@ -883,13 +851,11 @@ public void testConcurrentAllocateAndRelease() { // use atomic integer as a mutable integer reference final AtomicInteger completed = new AtomicInteger(); - final Runnable deploy4 = new Runnable() { @Override public void run() { try { - LogicalSlot slot = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, enumerator4.getAndIncrement(), 4), sharingGroup), false, Collections.emptyList()).get(); - + LogicalSlot slot = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid4, enumerator4.getAndIncrement(), 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); sleepUninterruptibly(rnd.nextInt(5)); slot.releaseSlot(); @@ -911,8 +877,7 @@ public void run() { public void run() { try { if (flag3.compareAndSet(false, true)) { - LogicalSlot slot = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 0, 1), sharingGroup), false, Collections.emptyList()).get(); - + LogicalSlot slot = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 0, 1, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); sleepUninterruptibly(5); executor.execute(deploy4); @@ -940,8 +905,8 @@ public void run() { @Override public void run() { try { - LogicalSlot slot = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, enumerator2.getAndIncrement(), 4), sharingGroup), false, Collections.emptyList()).get(); - + LogicalSlot slot = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid2, enumerator2.getAndIncrement(), 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + // wait a bit till scheduling the successor sleepUninterruptibly(rnd.nextInt(5)); executor.execute(deploy3); @@ -967,8 +932,8 @@ public void run() { @Override public void run() { try { - LogicalSlot slot = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, enumerator1.getAndIncrement(), 4), sharingGroup), false, Collections.emptyList()).get(); - + LogicalSlot slot = testingSlotProvider.allocateSlot(new ScheduledUnit(getTestVertex(jid1, enumerator1.getAndIncrement(), 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + // wait a bit till scheduling the successor sleepUninterruptibly(rnd.nextInt(5)); executor.execute(deploy2); @@ -1012,14 +977,12 @@ public void run() { assertFalse("Thread failed", failed.get()); - while (scheduler.getNumberOfAvailableSlots() < 4) { + while (testingSlotProvider.getNumberOfAvailableSlots() < 4) { sleepUninterruptibly(5); } - assertEquals(1, scheduler.getNumberOfAvailableInstances()); - assertEquals(1, scheduler.getNumberOfInstancesWithAvailableSlots()); - assertEquals(4, scheduler.getNumberOfAvailableSlots()); - assertEquals(13, scheduler.getNumberOfUnconstrainedAssignments()); + assertEquals(4, testingSlotProvider.getNumberOfAvailableSlots()); + assertEquals(13 * (run + 1), testingSlotProvider.getNumberOfUnconstrainedAssignments()); } } catch (Exception e) { @@ -1042,27 +1005,27 @@ public void testDopIncreases() { scheduler.newInstanceAvailable(getRandomInstance(4)); // schedule one task for the first and second vertex - LogicalSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 1), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 1), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 1, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 1, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); assertEquals( s1.getTaskManagerLocation(), s2.getTaskManagerLocation() ); assertEquals(3, scheduler.getNumberOfAvailableSlots()); - LogicalSlot s3_0 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 0, 5), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s3_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 1, 5), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s4_0 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 0, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s4_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 1, 4), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot s3_0 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 0, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s3_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 1, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s4_0 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 0, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s4_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 1, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); s1.releaseSlot(); s2.releaseSlot(); - LogicalSlot s3_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 2, 5), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s3_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 3, 5), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s4_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 2, 4), sharingGroup), false, Collections.emptyList()).get(); - LogicalSlot s4_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 3, 4), sharingGroup), false, Collections.emptyList()).get(); + LogicalSlot s3_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 2, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s3_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 3, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s4_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 2, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); + LogicalSlot s4_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 3, 4, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); try { - scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 4, 5), sharingGroup), false, Collections.emptyList()).get(); + scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 4, 5, sharingGroup), sharingGroup.getSlotSharingGroupId()), false, Collections.emptyList()).get(); fail("should throw an exception"); } catch (ExecutionException e) { diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerTest.java new file mode 100644 index 0000000000000..8fd5f9e306499 --- /dev/null +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerTest.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.jobmanager.scheduler; + +import org.apache.flink.runtime.instance.Instance; +import org.apache.flink.runtime.testingUtils.TestingUtils; +import org.apache.flink.util.TestLogger; + +import org.junit.Test; + +import static org.apache.flink.runtime.jobmanager.scheduler.SchedulerTestUtils.getRandomInstance; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.fail; + +public class SchedulerTest extends TestLogger { + + @Test + public void testAddAndRemoveInstance() { + Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); + + Instance i1 = getRandomInstance(2); + Instance i2 = getRandomInstance(2); + Instance i3 = getRandomInstance(2); + + assertEquals(0, scheduler.getNumberOfAvailableInstances()); + assertEquals(0, scheduler.getNumberOfAvailableSlots()); + scheduler.newInstanceAvailable(i1); + assertEquals(1, scheduler.getNumberOfAvailableInstances()); + assertEquals(2, scheduler.getNumberOfAvailableSlots()); + scheduler.newInstanceAvailable(i2); + assertEquals(2, scheduler.getNumberOfAvailableInstances()); + assertEquals(4, scheduler.getNumberOfAvailableSlots()); + scheduler.newInstanceAvailable(i3); + assertEquals(3, scheduler.getNumberOfAvailableInstances()); + assertEquals(6, scheduler.getNumberOfAvailableSlots()); + + // cannot add available instance again + try { + scheduler.newInstanceAvailable(i2); + fail("Scheduler accepted instance twice"); + } + catch (IllegalArgumentException e) { + // bueno! + } + + // some instances die + assertEquals(3, scheduler.getNumberOfAvailableInstances()); + assertEquals(6, scheduler.getNumberOfAvailableSlots()); + scheduler.instanceDied(i2); + assertEquals(2, scheduler.getNumberOfAvailableInstances()); + assertEquals(4, scheduler.getNumberOfAvailableSlots()); + + // try to add a dead instance + try { + scheduler.newInstanceAvailable(i2); + fail("Scheduler accepted dead instance"); + } + catch (IllegalArgumentException e) { + // stimmt + + } + + scheduler.instanceDied(i1); + assertEquals(1, scheduler.getNumberOfAvailableInstances()); + assertEquals(2, scheduler.getNumberOfAvailableSlots()); + scheduler.instanceDied(i3); + assertEquals(0, scheduler.getNumberOfAvailableInstances()); + assertEquals(0, scheduler.getNumberOfAvailableSlots()); + + assertFalse(i1.isAlive()); + assertFalse(i2.isAlive()); + assertFalse(i3.isAlive()); + } +} diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerTestBase.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerTestBase.java new file mode 100644 index 0000000000000..d5460eb6106cc --- /dev/null +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerTestBase.java @@ -0,0 +1,416 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.jobmanager.scheduler; + +import org.apache.flink.api.common.JobID; +import org.apache.flink.runtime.clusterframework.types.AllocationID; +import org.apache.flink.runtime.clusterframework.types.ResourceID; +import org.apache.flink.runtime.clusterframework.types.ResourceProfile; +import org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway; +import org.apache.flink.runtime.instance.Instance; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.jobmaster.slotpool.SlotSharingManager; +import org.apache.flink.runtime.jobmaster.slotpool.SlotPool; +import org.apache.flink.runtime.jobmaster.slotpool.SlotPoolGateway; +import org.apache.flink.runtime.jobmaster.slotpool.SlotProvider; +import org.apache.flink.runtime.instance.SlotSharingGroupId; +import org.apache.flink.runtime.jobgraph.JobVertexID; +import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; +import org.apache.flink.runtime.jobmaster.JobMasterId; +import org.apache.flink.runtime.rpc.RpcService; +import org.apache.flink.runtime.rpc.RpcUtils; +import org.apache.flink.runtime.rpc.TestingRpcService; +import org.apache.flink.runtime.taskexecutor.slot.SlotOffer; +import org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation; +import org.apache.flink.runtime.taskmanager.TaskManagerLocation; +import org.apache.flink.runtime.testingUtils.TestingUtils; +import org.apache.flink.util.FlinkException; +import org.apache.flink.util.Preconditions; +import org.apache.flink.util.TestLogger; + +import org.junit.After; +import org.junit.Before; +import org.junit.runners.Parameterized; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.apache.flink.runtime.jobmanager.scheduler.SchedulerTestBase.SchedulerType.SCHEDULER; +import static org.apache.flink.runtime.jobmanager.scheduler.SchedulerTestBase.SchedulerType.SLOT_POOL; +import static org.apache.flink.runtime.jobmanager.scheduler.SchedulerTestUtils.getRandomInstance; + +/** + * Test base for scheduler related test cases. The test are + * executed with the {@link Scheduler} and the {@link SlotPool}. + */ +public class SchedulerTestBase extends TestLogger { + + protected TestingSlotProvider testingSlotProvider; + + private SchedulerType schedulerType; + + private RpcService rpcService; + + enum SchedulerType { + SCHEDULER, + SLOT_POOL + } + + @Parameterized.Parameters(name = "Scheduler type = {0}") + public static Collection schedulerTypes() { + return Arrays.asList( + new Object[]{SCHEDULER}, + new Object[]{SLOT_POOL}); + } + + protected SchedulerTestBase(SchedulerType schedulerType) { + this.schedulerType = Preconditions.checkNotNull(schedulerType); + rpcService = null; + } + + @Before + public void setup() throws Exception { + switch (schedulerType) { + case SCHEDULER: + testingSlotProvider = new TestingSchedulerSlotProvider( + new Scheduler( + TestingUtils.defaultExecutionContext())); + break; + case SLOT_POOL: + rpcService = new TestingRpcService(); + final JobID jobId = new JobID(); + final TestingSlotPool slotPool = new TestingSlotPool(rpcService, jobId); + testingSlotProvider = new TestingSlotPoolSlotProvider(slotPool); + + final JobMasterId jobMasterId = JobMasterId.generate(); + final String jobManagerAddress = "localhost"; + slotPool.start(jobMasterId, jobManagerAddress); + break; + } + } + + @After + public void teardown() throws Exception { + if (testingSlotProvider != null) { + testingSlotProvider.shutdown(); + testingSlotProvider = null; + } + + if (rpcService != null) { + rpcService.stopService(); + rpcService = null; + } + } + + protected interface TestingSlotProvider extends SlotProvider { + TaskManagerLocation addTaskManager(int numberSlots); + + void releaseTaskManager(ResourceID resourceId); + + int getNumberOfAvailableSlots(); + + int getNumberOfLocalizedAssignments(); + + int getNumberOfNonLocalizedAssignments(); + + int getNumberOfUnconstrainedAssignments(); + + int getNumberOfHostLocalizedAssignments(); + + int getNumberOfSlots(SlotSharingGroup slotSharingGroup); + + int getNumberOfAvailableSlotsForGroup(SlotSharingGroup slotSharingGroup, JobVertexID jobVertexId); + + void shutdown() throws Exception; + } + + private static final class TestingSchedulerSlotProvider implements TestingSlotProvider { + private final Scheduler scheduler; + + private TestingSchedulerSlotProvider(Scheduler scheduler) { + this.scheduler = Preconditions.checkNotNull(scheduler); + } + + @Override + public CompletableFuture allocateSlot(ScheduledUnit task, boolean allowQueued, Collection preferredLocations) { + return scheduler.allocateSlot(task, allowQueued, preferredLocations); + } + + @Override + public TaskManagerLocation addTaskManager(int numberSlots) { + final Instance instance = getRandomInstance(numberSlots); + scheduler.newInstanceAvailable(instance); + + return instance.getTaskManagerLocation(); + } + + @Override + public void releaseTaskManager(ResourceID resourceId) { + final Instance instance = scheduler.getInstance(resourceId); + + if (instance != null) { + scheduler.instanceDied(instance); + } + } + + @Override + public int getNumberOfAvailableSlots() { + return scheduler.getNumberOfAvailableSlots(); + } + + @Override + public int getNumberOfLocalizedAssignments() { + return scheduler.getNumberOfLocalizedAssignments(); + } + + @Override + public int getNumberOfNonLocalizedAssignments() { + return scheduler.getNumberOfNonLocalizedAssignments(); + } + + @Override + public int getNumberOfUnconstrainedAssignments() { + return scheduler.getNumberOfUnconstrainedAssignments(); + } + + @Override + public int getNumberOfHostLocalizedAssignments() { + return 0; + } + + @Override + public int getNumberOfSlots(SlotSharingGroup slotSharingGroup) { + return slotSharingGroup.getTaskAssignment().getNumberOfSlots(); + } + + @Override + public int getNumberOfAvailableSlotsForGroup(SlotSharingGroup slotSharingGroup, JobVertexID jobVertexId) { + return slotSharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jobVertexId); + } + + @Override + public void shutdown() { + scheduler.shutdown(); + } + } + + private static final class TestingSlotPoolSlotProvider implements TestingSlotProvider { + + private final TestingSlotPool slotPool; + + private final SlotProvider slotProvider; + + private final AtomicInteger numberOfLocalizedAssignments; + + private final AtomicInteger numberOfNonLocalizedAssignments; + + private final AtomicInteger numberOfUnconstrainedAssignments; + + private final AtomicInteger numberOfHostLocalizedAssignments; + + private TestingSlotPoolSlotProvider(TestingSlotPool slotPool) { + this.slotPool = Preconditions.checkNotNull(slotPool); + this.slotProvider = slotPool.getSlotProvider(); + + this.numberOfLocalizedAssignments = new AtomicInteger(); + this.numberOfNonLocalizedAssignments = new AtomicInteger(); + this.numberOfUnconstrainedAssignments = new AtomicInteger(); + this.numberOfHostLocalizedAssignments = new AtomicInteger(); + } + + @Override + public TaskManagerLocation addTaskManager(int numberSlots) { + final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation(); + final ResourceID resourceId = taskManagerLocation.getResourceID(); + final SlotPoolGateway slotPoolGateway = slotPool.getSelfGateway(SlotPoolGateway.class); + + try { + slotPoolGateway.registerTaskManager(resourceId).get(); + } catch (Exception e) { + throw new RuntimeException("Unexpected exception occurred. This indicates a programming bug.", e); + } + + final TaskManagerGateway taskManagerGateway = new SimpleAckingTaskManagerGateway(); + final Collection slotOffers = new ArrayList<>(numberSlots); + + for (int i = 0; i < numberSlots; i++) { + final SlotOffer slotOffer = new SlotOffer( + new AllocationID(), + i, + ResourceProfile.UNKNOWN); + + slotOffers.add(slotOffer); + } + + final Collection acceptedSlotOffers; + + try { + acceptedSlotOffers = slotPoolGateway.offerSlots( + taskManagerLocation, + taskManagerGateway, + slotOffers).get(); + } catch (Exception e) { + throw new RuntimeException("Unexpected exception occurred. This indicates a programming bug.", e); + } + + Preconditions.checkState(acceptedSlotOffers.size() == numberSlots); + + return taskManagerLocation; + } + + @Override + public void releaseTaskManager(ResourceID resourceId) { + try { + slotPool.releaseTaskManager(resourceId).get(); + } catch (Exception e) { + throw new RuntimeException("Should not have happened.", e); + } + } + + @Override + public int getNumberOfAvailableSlots() { + try { + return slotPool.getNumberOfAvailableSlots().get(); + } catch (Exception e) { + throw new RuntimeException("Should not have happened.", e); + } + } + + @Override + public int getNumberOfLocalizedAssignments() { + return numberOfLocalizedAssignments.get(); + } + + @Override + public int getNumberOfNonLocalizedAssignments() { + return numberOfNonLocalizedAssignments.get(); + } + + @Override + public int getNumberOfUnconstrainedAssignments() { + return numberOfUnconstrainedAssignments.get(); + } + + @Override + public int getNumberOfHostLocalizedAssignments() { + return numberOfHostLocalizedAssignments.get(); + } + + @Override + public int getNumberOfSlots(SlotSharingGroup slotSharingGroup) { + try { + return slotPool.getNumberOfSharedSlots(slotSharingGroup.getSlotSharingGroupId()).get(); + } catch (Exception e) { + throw new RuntimeException("Should not have happened.", e); + } + } + + @Override + public int getNumberOfAvailableSlotsForGroup(SlotSharingGroup slotSharingGroup, JobVertexID jobVertexId) { + try { + return slotPool.getNumberOfAvailableSlotsForGroup(slotSharingGroup.getSlotSharingGroupId(), jobVertexId).get(); + } catch (Exception e) { + throw new RuntimeException("Should not have happened.", e); + } + } + + @Override + public void shutdown() throws Exception { + RpcUtils.terminateRpcEndpoint(slotPool, TestingUtils.TIMEOUT()); + } + + @Override + public CompletableFuture allocateSlot(ScheduledUnit task, boolean allowQueued, Collection preferredLocations) { + return slotProvider.allocateSlot(task, allowQueued, preferredLocations).thenApply( + (LogicalSlot logicalSlot) -> { + switch (logicalSlot.getLocality()) { + case LOCAL: + numberOfLocalizedAssignments.incrementAndGet(); + break; + case UNCONSTRAINED: + numberOfUnconstrainedAssignments.incrementAndGet(); + break; + case NON_LOCAL: + numberOfNonLocalizedAssignments.incrementAndGet(); + break; + case HOST_LOCAL: + numberOfHostLocalizedAssignments.incrementAndGet(); + break; + default: + // ignore + } + + return logicalSlot; + }); + } + } + + private static final class TestingSlotPool extends SlotPool { + + public TestingSlotPool(RpcService rpcService, JobID jobId) { + super(rpcService, jobId); + } + + CompletableFuture getNumberOfAvailableSlots() { + return callAsync( + () -> getAvailableSlots().size(), + TestingUtils.infiniteTime()); + } + + CompletableFuture getNumberOfSharedSlots(SlotSharingGroupId slotSharingGroupId) { + return callAsync( + () -> { + final SlotSharingManager multiTaskSlotManager = slotSharingManagers.get(slotSharingGroupId); + + if (multiTaskSlotManager != null) { + return multiTaskSlotManager.getResolvedRootSlots().size(); + } else { + throw new FlinkException("No MultiTaskSlotManager registered under " + slotSharingGroupId + '.'); + } + }, + TestingUtils.infiniteTime()); + } + + CompletableFuture getNumberOfAvailableSlotsForGroup(SlotSharingGroupId slotSharingGroupId, JobVertexID jobVertexId) { + return callAsync( + () -> { + final SlotSharingManager multiTaskSlotManager = slotSharingManagers.get(slotSharingGroupId); + + if (multiTaskSlotManager != null) { + int availableSlots = 0; + + for (SlotSharingManager.MultiTaskSlot multiTaskSlot : multiTaskSlotManager.getResolvedRootSlots()) { + if (!multiTaskSlot.contains(jobVertexId)) { + availableSlots++; + } + } + + return availableSlots; + } else { + throw new FlinkException("No MultiTaskSlotmanager registered under " + slotSharingGroupId + '.'); + } + }, + TestingUtils.infiniteTime()); + } + } + +} diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerTestUtils.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerTestUtils.java index 98dca034704d5..418625599fb8b 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerTestUtils.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/scheduler/SchedulerTestUtils.java @@ -18,9 +18,18 @@ package org.apache.flink.runtime.jobmanager.scheduler; -import static org.mockito.Matchers.any; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; +import org.apache.flink.api.common.JobID; +import org.apache.flink.runtime.clusterframework.types.ResourceID; +import org.apache.flink.runtime.executiongraph.Execution; +import org.apache.flink.runtime.executiongraph.ExecutionJobVertex; +import org.apache.flink.runtime.executiongraph.ExecutionVertex; +import org.apache.flink.runtime.instance.DummyActorGateway; +import org.apache.flink.runtime.instance.HardwareDescription; +import org.apache.flink.runtime.instance.Instance; +import org.apache.flink.runtime.instance.InstanceID; +import org.apache.flink.runtime.jobgraph.JobVertexID; +import org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway; +import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import java.net.InetAddress; import java.net.UnknownHostException; @@ -33,17 +42,9 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicInteger; -import org.apache.flink.runtime.clusterframework.types.ResourceID; -import org.apache.flink.runtime.executiongraph.Execution; -import org.apache.flink.runtime.executiongraph.ExecutionVertex; -import org.apache.flink.runtime.instance.DummyActorGateway; -import org.apache.flink.runtime.instance.HardwareDescription; -import org.apache.flink.runtime.instance.Instance; -import org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway; -import org.apache.flink.runtime.taskmanager.TaskManagerLocation; -import org.apache.flink.runtime.instance.InstanceID; -import org.apache.flink.api.common.JobID; -import org.apache.flink.runtime.jobgraph.JobVertexID; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; public class SchedulerTestUtils { @@ -83,9 +84,13 @@ public static Instance getRandomInstance(int numSlots) { public static Execution getDummyTask() { + ExecutionJobVertex executionJobVertex = mock(ExecutionJobVertex.class); + ExecutionVertex vertex = mock(ExecutionVertex.class); when(vertex.getJobId()).thenReturn(new JobID()); when(vertex.toString()).thenReturn("TEST-VERTEX"); + when(vertex.getJobVertex()).thenReturn(executionJobVertex); + when(vertex.getJobvertexId()).thenReturn(new JobVertexID()); Execution execution = mock(Execution.class); when(execution.getVertex()).thenReturn(vertex); @@ -117,11 +122,14 @@ public static Execution getTestVertex(Iterable preferredLoc } public static Execution getTestVertex(Collection> preferredLocationFutures) { + ExecutionJobVertex executionJobVertex = mock(ExecutionJobVertex.class); ExecutionVertex vertex = mock(ExecutionVertex.class); when(vertex.getPreferredLocationsBasedOnInputs()).thenReturn(preferredLocationFutures); when(vertex.getJobId()).thenReturn(new JobID()); when(vertex.toString()).thenReturn("TEST-VERTEX"); + when(vertex.getJobVertex()).thenReturn(executionJobVertex); + when(vertex.getJobvertexId()).thenReturn(new JobVertexID()); Execution execution = mock(Execution.class); when(execution.getVertex()).thenReturn(vertex); @@ -130,9 +138,11 @@ public static Execution getTestVertex(Collection> preferrecLocationFutures = new ArrayList<>(locations.length); + Collection> preferredLocationFutures = new ArrayList<>(locations.length); for (TaskManagerLocation location : locations) { - preferrecLocationFutures.add(CompletableFuture.completedFuture(location)); + preferredLocationFutures.add(CompletableFuture.completedFuture(location)); } - when(vertex.getPreferredLocationsBasedOnInputs()).thenReturn(preferrecLocationFutures); + when(vertex.getJobVertex()).thenReturn(executionJobVertex); + when(vertex.getPreferredLocationsBasedOnInputs()).thenReturn(preferredLocationFutures); when(vertex.getJobId()).thenReturn(new JobID()); when(vertex.getJobvertexId()).thenReturn(jid); when(vertex.getParallelSubtaskIndex()).thenReturn(taskIndex); diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/slots/DummySlotOwner.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/slots/DummySlotOwner.java index 6d17ad01a0e07..add1ec2cde7f1 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/slots/DummySlotOwner.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/slots/DummySlotOwner.java @@ -18,7 +18,8 @@ package org.apache.flink.runtime.jobmanager.slots; -import org.apache.flink.runtime.instance.LogicalSlot; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.jobmaster.SlotOwner; import java.util.concurrent.CompletableFuture; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/slots/TestingSlotOwner.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/slots/TestingSlotOwner.java index e7f9485f68dad..727c0b5723af4 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/slots/TestingSlotOwner.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmanager/slots/TestingSlotOwner.java @@ -18,7 +18,8 @@ package org.apache.flink.runtime.jobmanager.slots; -import org.apache.flink.runtime.instance.LogicalSlot; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.jobmaster.SlotOwner; import java.util.concurrent.CompletableFuture; import java.util.function.Consumer; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/TestingLogicalSlot.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/TestingLogicalSlot.java similarity index 79% rename from flink-runtime/src/test/java/org/apache/flink/runtime/instance/TestingLogicalSlot.java rename to flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/TestingLogicalSlot.java index 20660170ca153..e20700ee9edf6 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/TestingLogicalSlot.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/TestingLogicalSlot.java @@ -16,10 +16,12 @@ * limitations under the License. */ -package org.apache.flink.runtime.instance; +package org.apache.flink.runtime.jobmaster; import org.apache.flink.runtime.clusterframework.types.AllocationID; import org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway; +import org.apache.flink.runtime.instance.SlotSharingGroupId; +import org.apache.flink.runtime.jobmanager.scheduler.Locality; import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; import org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; @@ -31,7 +33,7 @@ import java.util.concurrent.atomic.AtomicReference; /** - * Simple testing logical slot for testing purposes. + * Simple logical slot for testing purposes. */ public class TestingLogicalSlot implements LogicalSlot { @@ -47,7 +49,9 @@ public class TestingLogicalSlot implements LogicalSlot { private final AllocationID allocationId; - private final SlotRequestID slotRequestId; + private final SlotRequestId slotRequestId; + + private final SlotSharingGroupId slotSharingGroupId; public TestingLogicalSlot() { this( @@ -55,7 +59,8 @@ public TestingLogicalSlot() { new SimpleAckingTaskManagerGateway(), 0, new AllocationID(), - new SlotRequestID()); + new SlotRequestId(), + new SlotSharingGroupId()); } public TestingLogicalSlot( @@ -63,13 +68,15 @@ public TestingLogicalSlot( TaskManagerGateway taskManagerGateway, int slotNumber, AllocationID allocationId, - SlotRequestID slotRequestId) { + SlotRequestId slotRequestId, + SlotSharingGroupId slotSharingGroupId) { this.taskManagerLocation = Preconditions.checkNotNull(taskManagerLocation); this.taskManagerGateway = Preconditions.checkNotNull(taskManagerGateway); this.payloadReference = new AtomicReference<>(); this.slotNumber = slotNumber; this.allocationId = Preconditions.checkNotNull(allocationId); this.slotRequestId = Preconditions.checkNotNull(slotRequestId); + this.slotSharingGroupId = Preconditions.checkNotNull(slotSharingGroupId); } @Override @@ -82,6 +89,11 @@ public TaskManagerGateway getTaskManagerGateway() { return taskManagerGateway; } + @Override + public Locality getLocality() { + return Locality.UNKNOWN; + } + @Override public boolean isAlive() { return !releaseFuture.isDone(); @@ -99,7 +111,7 @@ public Payload getPayload() { } @Override - public CompletableFuture releaseSlot() { + public CompletableFuture releaseSlot(@Nullable Throwable cause) { releaseFuture.complete(null); return releaseFuture; @@ -116,7 +128,13 @@ public AllocationID getAllocationId() { } @Override - public SlotRequestID getSlotRequestId() { + public SlotRequestId getSlotRequestId() { return slotRequestId; } + + @Nullable + @Override + public SlotSharingGroupId getSlotSharingGroupId() { + return slotSharingGroupId; + } } diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/TestingPayload.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/TestingPayload.java similarity index 96% rename from flink-runtime/src/test/java/org/apache/flink/runtime/instance/TestingPayload.java rename to flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/TestingPayload.java index 3369882669f1d..a59f7650b24c2 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/TestingPayload.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/TestingPayload.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.flink.runtime.instance; +package org.apache.flink.runtime.jobmaster; import java.util.concurrent.CompletableFuture; diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/AllocatedSlotsTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/AllocatedSlotsTest.java similarity index 94% rename from flink-runtime/src/test/java/org/apache/flink/runtime/instance/AllocatedSlotsTest.java rename to flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/AllocatedSlotsTest.java index 223d43c42b8e4..4dee92441daf9 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/AllocatedSlotsTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/AllocatedSlotsTest.java @@ -16,13 +16,13 @@ * limitations under the License. */ -package org.apache.flink.runtime.instance; +package org.apache.flink.runtime.jobmaster.slotpool; import org.apache.flink.runtime.clusterframework.types.AllocationID; import org.apache.flink.runtime.clusterframework.types.ResourceID; import org.apache.flink.runtime.clusterframework.types.ResourceProfile; import org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway; -import org.apache.flink.runtime.jobmanager.slots.DummySlotOwner; +import org.apache.flink.runtime.jobmaster.SlotRequestId; import org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; import org.apache.flink.util.TestLogger; @@ -41,7 +41,7 @@ public void testOperations() throws Exception { SlotPool.AllocatedSlots allocatedSlots = new SlotPool.AllocatedSlots(); final AllocationID allocation1 = new AllocationID(); - final SlotRequestID slotRequestID = new SlotRequestID(); + final SlotRequestId slotRequestID = new SlotRequestId(); final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation(); final ResourceID resource1 = taskManagerLocation.getResourceID(); final AllocatedSlot slot1 = createSlot(allocation1, taskManagerLocation); @@ -56,7 +56,7 @@ public void testOperations() throws Exception { assertEquals(1, allocatedSlots.size()); final AllocationID allocation2 = new AllocationID(); - final SlotRequestID slotRequestID2 = new SlotRequestID(); + final SlotRequestId slotRequestID2 = new SlotRequestId(); final AllocatedSlot slot2 = createSlot(allocation2, taskManagerLocation); allocatedSlots.add(slotRequestID2, slot2); @@ -71,7 +71,7 @@ public void testOperations() throws Exception { assertEquals(2, allocatedSlots.size()); final AllocationID allocation3 = new AllocationID(); - final SlotRequestID slotRequestID3 = new SlotRequestID(); + final SlotRequestId slotRequestID3 = new SlotRequestId(); final TaskManagerLocation taskManagerLocation2 = new LocalTaskManagerLocation(); final ResourceID resource2 = taskManagerLocation2.getResourceID(); final AllocatedSlot slot3 = createSlot(allocation3, taskManagerLocation2); @@ -143,7 +143,6 @@ private AllocatedSlot createSlot(final AllocationID allocationId, final TaskMana taskManagerLocation, 0, ResourceProfile.UNKNOWN, - new SimpleAckingTaskManagerGateway(), - new DummySlotOwner()); + new SimpleAckingTaskManagerGateway()); } } diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/AvailableSlotsTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/AvailableSlotsTest.java similarity index 95% rename from flink-runtime/src/test/java/org/apache/flink/runtime/instance/AvailableSlotsTest.java rename to flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/AvailableSlotsTest.java index 9ede8997173a8..4835c57c56068 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/AvailableSlotsTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/AvailableSlotsTest.java @@ -16,12 +16,11 @@ * limitations under the License. */ -package org.apache.flink.runtime.instance; +package org.apache.flink.runtime.jobmaster.slotpool; import org.apache.flink.runtime.clusterframework.types.AllocationID; import org.apache.flink.runtime.clusterframework.types.ResourceID; import org.apache.flink.runtime.clusterframework.types.ResourceProfile; -import org.apache.flink.runtime.jobmanager.slots.DummySlotOwner; import org.apache.flink.runtime.jobmanager.slots.SlotAndLocality; import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; import org.apache.flink.runtime.taskmanager.TaskManagerLocation; @@ -99,7 +98,7 @@ public void testPollFreeSlot() { assertNull(availableSlots.poll(DEFAULT_TESTING_BIG_PROFILE, null)); SlotAndLocality slotAndLocality = availableSlots.poll(DEFAULT_TESTING_PROFILE, null); - assertEquals(slot1, slotAndLocality.slot()); + assertEquals(slot1, slotAndLocality.getSlot()); assertEquals(0, availableSlots.size()); assertFalse(availableSlots.contains(slot1.getAllocationId())); assertFalse(availableSlots.containsTaskManager(resource1)); @@ -116,7 +115,6 @@ static AllocatedSlot createAllocatedSlot(final ResourceID resourceId) { mockTaskManagerLocation, 0, DEFAULT_TESTING_PROFILE, - mockTaskManagerGateway, - new DummySlotOwner()); + mockTaskManagerGateway); } } diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolCoLocationTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolCoLocationTest.java new file mode 100644 index 0000000000000..7454e3e178bc6 --- /dev/null +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolCoLocationTest.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.jobmaster.slotpool; + +import org.apache.flink.runtime.clusterframework.types.AllocationID; +import org.apache.flink.runtime.clusterframework.types.ResourceProfile; +import org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway; +import org.apache.flink.runtime.instance.SlotSharingGroupId; +import org.apache.flink.runtime.jobgraph.JobVertexID; +import org.apache.flink.runtime.jobmanager.scheduler.CoLocationConstraint; +import org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroup; +import org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.resourcemanager.SlotRequest; +import org.apache.flink.runtime.taskexecutor.slot.SlotOffer; +import org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation; +import org.apache.flink.runtime.taskmanager.TaskManagerLocation; + +import org.junit.Test; + +import java.util.Collections; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertTrue; + +/** + * Test cases for {@link CoLocationConstraint} with the {@link SlotPool}. + */ +public class SlotPoolCoLocationTest extends SlotPoolSchedulingTestBase { + + /** + * Tests the scheduling of two tasks with a parallelism of 2 and a co-location constraint. + */ + @Test + public void testSimpleCoLocatedSlotScheduling() throws ExecutionException, InterruptedException { + final BlockingQueue allocationIds = new ArrayBlockingQueue<>(2); + + testingResourceManagerGateway.setRequestSlotConsumer( + (SlotRequest slotRequest) -> allocationIds.offer(slotRequest.getAllocationId())); + + final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation(); + + slotPoolGateway.registerTaskManager(taskManagerLocation.getResourceID()).get(); + + CoLocationGroup group = new CoLocationGroup(); + CoLocationConstraint coLocationConstraint1 = group.getLocationConstraint(0); + CoLocationConstraint coLocationConstraint2 = group.getLocationConstraint(1); + + final SlotSharingGroupId slotSharingGroupId = new SlotSharingGroupId(); + + JobVertexID jobVertexId1 = new JobVertexID(); + JobVertexID jobVertexId2 = new JobVertexID(); + + CompletableFuture logicalSlotFuture11 = slotProvider.allocateSlot( + new ScheduledUnit( + jobVertexId1, + slotSharingGroupId, + coLocationConstraint1), + true, + Collections.emptyList()); + + CompletableFuture logicalSlotFuture22 = slotProvider.allocateSlot( + new ScheduledUnit( + jobVertexId2, + slotSharingGroupId, + coLocationConstraint2), + true, + Collections.emptyList()); + + CompletableFuture logicalSlotFuture12 = slotProvider.allocateSlot( + new ScheduledUnit( + jobVertexId2, + slotSharingGroupId, + coLocationConstraint1), + true, + Collections.emptyList()); + + CompletableFuture logicalSlotFuture21 = slotProvider.allocateSlot( + new ScheduledUnit( + jobVertexId1, + slotSharingGroupId, + coLocationConstraint2), + true, + Collections.emptyList()); + + final AllocationID allocationId1 = allocationIds.take(); + final AllocationID allocationId2 = allocationIds.take(); + + CompletableFuture slotOfferFuture1 = slotPoolGateway.offerSlot( + taskManagerLocation, + new SimpleAckingTaskManagerGateway(), + new SlotOffer( + allocationId1, + 0, + ResourceProfile.UNKNOWN)); + + CompletableFuture slotOfferFuture2 = slotPoolGateway.offerSlot( + taskManagerLocation, + new SimpleAckingTaskManagerGateway(), + new SlotOffer( + allocationId2, + 0, + ResourceProfile.UNKNOWN)); + + assertTrue(slotOfferFuture1.get()); + assertTrue(slotOfferFuture2.get()); + + LogicalSlot logicalSlot11 = logicalSlotFuture11.get(); + LogicalSlot logicalSlot12 = logicalSlotFuture12.get(); + LogicalSlot logicalSlot21 = logicalSlotFuture21.get(); + LogicalSlot logicalSlot22 = logicalSlotFuture22.get(); + + assertEquals(logicalSlot11.getAllocationId(), logicalSlot12.getAllocationId()); + assertEquals(logicalSlot21.getAllocationId(), logicalSlot22.getAllocationId()); + assertNotEquals(logicalSlot11.getAllocationId(), logicalSlot21.getAllocationId()); + } +} diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/SlotPoolRpcTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolRpcTest.java similarity index 87% rename from flink-runtime/src/test/java/org/apache/flink/runtime/instance/SlotPoolRpcTest.java rename to flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolRpcTest.java index 60e1d342b13e4..2d862c52561b8 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/SlotPoolRpcTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolRpcTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.flink.runtime.instance; +package org.apache.flink.runtime.jobmaster.slotpool; import org.apache.flink.api.common.JobID; import org.apache.flink.api.common.time.Time; @@ -24,11 +24,14 @@ import org.apache.flink.runtime.akka.AkkaUtils; import org.apache.flink.runtime.clusterframework.types.AllocationID; import org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway; +import org.apache.flink.runtime.instance.SlotSharingGroupId; import org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException; import org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit; import org.apache.flink.runtime.jobmanager.scheduler.SchedulerTestUtils; import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; import org.apache.flink.runtime.jobmaster.JobMasterId; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.jobmaster.SlotRequestId; import org.apache.flink.runtime.messages.Acknowledge; import org.apache.flink.runtime.resourcemanager.ResourceManagerGateway; import org.apache.flink.runtime.resourcemanager.SlotRequest; @@ -52,12 +55,14 @@ import org.junit.BeforeClass; import org.junit.Test; +import javax.annotation.Nullable; + import java.util.Collections; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.function.Consumer; -import static org.apache.flink.runtime.instance.AvailableSlotsTest.DEFAULT_TESTING_PROFILE; +import static org.apache.flink.runtime.jobmaster.slotpool.AvailableSlotsTest.DEFAULT_TESTING_PROFILE; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -111,10 +116,11 @@ public void testSlotAllocationNoResourceManager() throws Exception { pool.start(JobMasterId.generate(), "foobar"); CompletableFuture future = pool.allocateSlot( - new SlotRequestID(), + new SlotRequestId(), new ScheduledUnit(SchedulerTestUtils.getDummyTask()), DEFAULT_TESTING_PROFILE, Collections.emptyList(), + true, TestingUtils.infiniteTime()); try { @@ -144,12 +150,13 @@ public void testCancelSlotAllocationWithoutResourceManager() throws Exception { pool.start(JobMasterId.generate(), "foobar"); SlotPoolGateway slotPoolGateway = pool.getSelfGateway(SlotPoolGateway.class); - SlotRequestID requestId = new SlotRequestID(); + SlotRequestId requestId = new SlotRequestId(); CompletableFuture future = slotPoolGateway.allocateSlot( requestId, new ScheduledUnit(SchedulerTestUtils.getDummyTask()), DEFAULT_TESTING_PROFILE, Collections.emptyList(), + true, Time.milliseconds(10L)); try { @@ -161,7 +168,7 @@ public void testCancelSlotAllocationWithoutResourceManager() throws Exception { assertEquals(1L, (long) pool.getNumberOfWaitingForResourceRequests().get()); - slotPoolGateway.cancelSlotRequest(requestId).get(); + slotPoolGateway.releaseSlot(requestId, null, null).get(); assertEquals(0L, (long) pool.getNumberOfWaitingForResourceRequests().get()); } finally { @@ -188,12 +195,13 @@ public void testCancelSlotAllocationWithResourceManager() throws Exception { ResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(); pool.connectToResourceManager(resourceManagerGateway); - SlotRequestID requestId = new SlotRequestID(); + SlotRequestId requestId = new SlotRequestId(); CompletableFuture future = slotPoolGateway.allocateSlot( requestId, new ScheduledUnit(SchedulerTestUtils.getDummyTask()), DEFAULT_TESTING_PROFILE, Collections.emptyList(), + true, Time.milliseconds(10L)); try { @@ -205,7 +213,7 @@ public void testCancelSlotAllocationWithResourceManager() throws Exception { assertEquals(1L, (long) pool.getNumberOfPendingRequests().get()); - slotPoolGateway.cancelSlotRequest(requestId).get(); + slotPoolGateway.releaseSlot(requestId, null, null).get(); assertEquals(0L, (long) pool.getNumberOfPendingRequests().get()); } finally { RpcUtils.terminateRpcEndpoint(pool, timeout); @@ -239,12 +247,13 @@ public void testCancelSlotAllocationWhileSlotFulfilled() throws Exception { pool.connectToResourceManager(resourceManagerGateway); - SlotRequestID requestId = new SlotRequestID(); + SlotRequestId requestId = new SlotRequestId(); CompletableFuture future = slotPoolGateway.allocateSlot( requestId, new ScheduledUnit(SchedulerTestUtils.getDummyTask()), DEFAULT_TESTING_PROFILE, Collections.emptyList(), + true, Time.milliseconds(10L)); try { @@ -270,7 +279,7 @@ public void testCancelSlotAllocationWhileSlotFulfilled() throws Exception { assertTrue(pool.containsAllocatedSlot(allocationId).get()); - pool.cancelSlotRequest(requestId).get(); + pool.releaseSlot(requestId, null, null).get(); assertFalse(pool.containsAllocatedSlot(allocationId).get()); assertTrue(pool.containsAvailableSlot(allocationId).get()); @@ -295,10 +304,10 @@ public void testProviderAndOwner() throws Exception { TestingUtils.infiniteTime(), TestingUtils.infiniteTime()); - final CompletableFuture cancelFuture = new CompletableFuture<>(); + final CompletableFuture releaseSlotFuture = new CompletableFuture<>(); - pool.setCancelSlotAllocationConsumer( - slotRequestID -> cancelFuture.complete(slotRequestID)); + pool.setReleaseSlotConsumer( + slotRequestID -> releaseSlotFuture.complete(slotRequestID)); try { pool.start(JobMasterId.generate(), "foobar"); @@ -321,7 +330,7 @@ public void testProviderAndOwner() throws Exception { } // wait for the cancel call on the SlotPool - cancelFuture.get(); + releaseSlotFuture.get(); assertEquals(0L, (long) pool.getNumberOfPendingRequests().get()); } finally { @@ -334,7 +343,7 @@ public void testProviderAndOwner() throws Exception { */ private static final class TestingSlotPool extends SlotPool { - private volatile Consumer cancelSlotAllocationConsumer; + private volatile Consumer releaseSlotConsumer; public TestingSlotPool( RpcService rpcService, @@ -351,22 +360,25 @@ public TestingSlotPool( resourceManagerAllocationTimeout, resourceManagerRequestTimeout); - cancelSlotAllocationConsumer = null; + releaseSlotConsumer = null; } - public void setCancelSlotAllocationConsumer(Consumer cancelSlotAllocationConsumer) { - this.cancelSlotAllocationConsumer = Preconditions.checkNotNull(cancelSlotAllocationConsumer); + public void setReleaseSlotConsumer(Consumer releaseSlotConsumer) { + this.releaseSlotConsumer = Preconditions.checkNotNull(releaseSlotConsumer); } @Override - public CompletableFuture cancelSlotRequest(SlotRequestID slotRequestId) { - final Consumer currentCancelSlotAllocationConsumer = cancelSlotAllocationConsumer; - - if (currentCancelSlotAllocationConsumer != null) { - currentCancelSlotAllocationConsumer.accept(slotRequestId); + public CompletableFuture releaseSlot( + SlotRequestId slotRequestId, + @Nullable SlotSharingGroupId slotSharingGroupId, + @Nullable Throwable cause) { + final Consumer currentReleaseSlotConsumer = releaseSlotConsumer; + + if (currentReleaseSlotConsumer != null) { + currentReleaseSlotConsumer.accept(slotRequestId); } - return super.cancelSlotRequest(slotRequestId); + return super.releaseSlot(slotRequestId, slotSharingGroupId, cause); } CompletableFuture containsAllocatedSlot(AllocationID allocationId) { diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolSchedulingTestBase.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolSchedulingTestBase.java new file mode 100644 index 0000000000000..31be1ae862714 --- /dev/null +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolSchedulingTestBase.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.jobmaster.slotpool; + +import org.apache.flink.api.common.JobID; +import org.apache.flink.runtime.jobmaster.JobMasterId; +import org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway; +import org.apache.flink.runtime.rpc.RpcUtils; +import org.apache.flink.runtime.rpc.TestingRpcService; +import org.apache.flink.runtime.testingUtils.TestingUtils; +import org.apache.flink.util.TestLogger; + +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; + +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeoutException; + +/** + * Test base for {@link SlotPool} related scheduling test cases. + */ +public class SlotPoolSchedulingTestBase extends TestLogger { + + private static final JobID jobId = new JobID(); + + private static final JobMasterId jobMasterId = new JobMasterId(); + + private static final String jobMasterAddress = "foobar"; + + private static TestingRpcService testingRpcService; + + protected SlotPool slotPool; + + protected SlotPoolGateway slotPoolGateway; + + protected SlotProvider slotProvider; + + protected TestingResourceManagerGateway testingResourceManagerGateway; + + @BeforeClass + public static void setup() { + testingRpcService = new TestingRpcService(); + } + + @AfterClass + public static void teardown() { + if (testingRpcService != null) { + testingRpcService.stopService(); + testingRpcService = null; + } + } + + @Before + public void setupBefore() throws Exception { + testingResourceManagerGateway = new TestingResourceManagerGateway(); + + slotPool = new SlotPool( + testingRpcService, + jobId); + + slotPool.start(jobMasterId, jobMasterAddress); + + slotPoolGateway = slotPool.getSelfGateway(SlotPoolGateway.class); + + slotProvider = slotPool.getSlotProvider(); + + slotPool.connectToResourceManager(testingResourceManagerGateway); + } + + @After + public void teardownAfter() throws InterruptedException, ExecutionException, TimeoutException { + if (slotPool != null) { + RpcUtils.terminateRpcEndpoint(slotPool, TestingUtils.TIMEOUT()); + slotPool = null; + } + } +} diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolSlotSharingTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolSlotSharingTest.java new file mode 100644 index 0000000000000..bb6c2b72d0d94 --- /dev/null +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolSlotSharingTest.java @@ -0,0 +1,304 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.jobmaster.slotpool; + +import org.apache.flink.runtime.clusterframework.types.AllocationID; +import org.apache.flink.runtime.clusterframework.types.ResourceProfile; +import org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway; +import org.apache.flink.runtime.instance.SlotSharingGroupId; +import org.apache.flink.runtime.jobgraph.JobVertexID; +import org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.resourcemanager.SlotRequest; +import org.apache.flink.runtime.taskexecutor.slot.SlotOffer; +import org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation; +import org.apache.flink.runtime.taskmanager.TaskManagerLocation; +import org.apache.flink.util.ExceptionUtils; +import org.apache.flink.util.FlinkException; + +import org.junit.Test; + +import java.util.Collections; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +/** + * Test cases for slot sharing with the {@link SlotPool}. + */ +public class SlotPoolSlotSharingTest extends SlotPoolSchedulingTestBase { + + @Test + public void testSingleQueuedSharedSlotScheduling() throws Exception { + final CompletableFuture allocationIdFuture = new CompletableFuture<>(); + testingResourceManagerGateway.setRequestSlotConsumer( + (SlotRequest slotRequest) -> allocationIdFuture.complete(slotRequest.getAllocationId())); + + LocalTaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation(); + slotPoolGateway.registerTaskManager(taskManagerLocation.getResourceID()).get(); + + SlotSharingGroupId slotSharingGroupId = new SlotSharingGroupId(); + CompletableFuture logicalSlotFuture = slotProvider.allocateSlot( + new ScheduledUnit( + new JobVertexID(), + slotSharingGroupId, + null), + true, + Collections.emptyList()); + + assertFalse(logicalSlotFuture.isDone()); + + final AllocationID allocationId = allocationIdFuture.get(); + + CompletableFuture booleanCompletableFuture = slotPoolGateway.offerSlot( + taskManagerLocation, + new SimpleAckingTaskManagerGateway(), + new SlotOffer( + allocationId, + 0, + ResourceProfile.UNKNOWN)); + + assertTrue(booleanCompletableFuture.get()); + + final LogicalSlot logicalSlot = logicalSlotFuture.get(); + + assertEquals(slotSharingGroupId, logicalSlot.getSlotSharingGroupId()); + } + + /** + * Tests that returned slot futures are failed if the allocation request is failed. + */ + @Test + public void testFailingQueuedSharedSlotScheduling() throws ExecutionException, InterruptedException { + final CompletableFuture allocationIdFuture = new CompletableFuture<>(); + testingResourceManagerGateway.setRequestSlotConsumer( + (SlotRequest slotRequest) -> allocationIdFuture.complete(slotRequest.getAllocationId())); + + CompletableFuture logicalSlotFuture = slotProvider.allocateSlot( + new ScheduledUnit( + new JobVertexID(), + new SlotSharingGroupId(), + null), + true, + Collections.emptyList()); + + final AllocationID allocationId = allocationIdFuture.get(); + + // this should fail the returned logical slot future + slotPoolGateway.failAllocation(allocationId, new FlinkException("Testing Exception")); + + try { + logicalSlotFuture.get(); + fail("The slot future should have failed."); + } catch (ExecutionException ee) { + assertTrue(ExceptionUtils.findThrowable(ee, FlinkException.class).isPresent()); + } + } + + /** + * Tests queued slot scheduling with a single slot sharing group + */ + @Test + public void testQueuedSharedSlotScheduling() throws InterruptedException, ExecutionException { + final BlockingQueue allocationIds = new ArrayBlockingQueue<>(2); + testingResourceManagerGateway.setRequestSlotConsumer( + (SlotRequest slotRequest) -> allocationIds.offer(slotRequest.getAllocationId())); + + final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation(); + + slotPoolGateway.registerTaskManager(taskManagerLocation.getResourceID()).get(); + + final SlotSharingGroupId slotSharingGroupId = new SlotSharingGroupId(); + final JobVertexID jobVertexId1 = new JobVertexID(); + final JobVertexID jobVertexId2 = new JobVertexID(); + + CompletableFuture logicalSlotFuture1 = slotProvider.allocateSlot( + new ScheduledUnit( + jobVertexId1, + slotSharingGroupId, + null), + true, + Collections.emptyList()); + + CompletableFuture logicalSlotFuture2 = slotProvider.allocateSlot( + new ScheduledUnit( + jobVertexId2, + slotSharingGroupId, + null), + true, + Collections.emptyList()); + + assertFalse(logicalSlotFuture1.isDone()); + assertFalse(logicalSlotFuture2.isDone()); + + final AllocationID allocationId1 = allocationIds.take(); + + CompletableFuture logicalSlotFuture3 = slotProvider.allocateSlot( + new ScheduledUnit( + jobVertexId1, + slotSharingGroupId, + null), + true, + Collections.emptyList()); + + CompletableFuture logicalSlotFuture4 = slotProvider.allocateSlot( + new ScheduledUnit( + jobVertexId2, + slotSharingGroupId, + null), + true, + Collections.emptyList()); + + assertFalse(logicalSlotFuture3.isDone()); + assertFalse(logicalSlotFuture4.isDone()); + + final AllocationID allocationId2 = allocationIds.take(); + + // this should fulfill the first two slot futures + CompletableFuture offerFuture = slotPoolGateway.offerSlot( + taskManagerLocation, + new SimpleAckingTaskManagerGateway(), + new SlotOffer( + allocationId1, + 0, + ResourceProfile.UNKNOWN)); + + assertTrue(offerFuture.get()); + + LogicalSlot logicalSlot1 = logicalSlotFuture1.get(); + LogicalSlot logicalSlot2 = logicalSlotFuture2.get(); + + assertEquals(logicalSlot1.getTaskManagerLocation(), logicalSlot2.getTaskManagerLocation()); + assertEquals(allocationId1, logicalSlot1.getAllocationId()); + assertEquals(allocationId1, logicalSlot2.getAllocationId()); + + assertFalse(logicalSlotFuture3.isDone()); + assertFalse(logicalSlotFuture4.isDone()); + + // release the shared slot by releasing the individual tasks + logicalSlot1.releaseSlot(null); + logicalSlot2.releaseSlot(null); + + LogicalSlot logicalSlot3 = logicalSlotFuture3.get(); + LogicalSlot logicalSlot4 = logicalSlotFuture4.get(); + + assertEquals(logicalSlot3.getTaskManagerLocation(), logicalSlot4.getTaskManagerLocation()); + assertEquals(allocationId1, logicalSlot3.getAllocationId()); + assertEquals(allocationId1, logicalSlot4.getAllocationId()); + } + + /** + * Tests queued slot scheduling with multiple slot sharing groups. + */ + @Test + public void testQueuedMultipleSlotSharingGroups() throws ExecutionException, InterruptedException { + final BlockingQueue allocationIds = new ArrayBlockingQueue<>(4); + + testingResourceManagerGateway.setRequestSlotConsumer( + (SlotRequest slotRequest) -> allocationIds.offer(slotRequest.getAllocationId())); + + final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation(); + final SlotSharingGroupId slotSharingGroupId1 = new SlotSharingGroupId(); + final SlotSharingGroupId slotSharingGroupId2 = new SlotSharingGroupId(); + final JobVertexID jobVertexId1 = new JobVertexID(); + final JobVertexID jobVertexId2 = new JobVertexID(); + final JobVertexID jobVertexId3 = new JobVertexID(); + final JobVertexID jobVertexId4 = new JobVertexID(); + + slotPoolGateway.registerTaskManager(taskManagerLocation.getResourceID()).get(); + + CompletableFuture logicalSlotFuture1 = slotProvider.allocateSlot( + new ScheduledUnit( + jobVertexId1, + slotSharingGroupId1, + null), + true, + Collections.emptyList()); + + CompletableFuture logicalSlotFuture2 = slotProvider.allocateSlot( + new ScheduledUnit( + jobVertexId2, + slotSharingGroupId1, + null), + true, + Collections.emptyList()); + + CompletableFuture logicalSlotFuture3 = slotProvider.allocateSlot( + new ScheduledUnit( + jobVertexId3, + slotSharingGroupId2, + null), + true, + Collections.emptyList()); + + CompletableFuture logicalSlotFuture4 = slotProvider.allocateSlot( + new ScheduledUnit( + jobVertexId4, + slotSharingGroupId2, + null), + true, + Collections.emptyList()); + + assertFalse(logicalSlotFuture1.isDone()); + assertFalse(logicalSlotFuture2.isDone()); + assertFalse(logicalSlotFuture3.isDone()); + assertFalse(logicalSlotFuture4.isDone()); + + // we expect two slot requests + final AllocationID allocationId1 = allocationIds.take(); + final AllocationID allocationId2 = allocationIds.take(); + + CompletableFuture offerFuture1 = slotPoolGateway.offerSlot( + taskManagerLocation, + new SimpleAckingTaskManagerGateway(), + new SlotOffer( + allocationId1, + 0, + ResourceProfile.UNKNOWN)); + + CompletableFuture offerFuture2 = slotPoolGateway.offerSlot( + taskManagerLocation, + new SimpleAckingTaskManagerGateway(), + new SlotOffer( + allocationId2, + 0, + ResourceProfile.UNKNOWN)); + + assertTrue(offerFuture1.get()); + assertTrue(offerFuture2.get()); + + LogicalSlot logicalSlot1 = logicalSlotFuture1.get(); + LogicalSlot logicalSlot2 = logicalSlotFuture2.get(); + LogicalSlot logicalSlot3 = logicalSlotFuture3.get(); + LogicalSlot logicalSlot4 = logicalSlotFuture4.get(); + + assertEquals(logicalSlot1.getTaskManagerLocation(), logicalSlot2.getTaskManagerLocation()); + assertEquals(logicalSlot3.getTaskManagerLocation(), logicalSlot4.getTaskManagerLocation()); + + assertEquals(allocationId1, logicalSlot1.getAllocationId()); + assertEquals(allocationId2, logicalSlot3.getAllocationId()); + } + +} diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/SlotPoolTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolTest.java similarity index 87% rename from flink-runtime/src/test/java/org/apache/flink/runtime/instance/SlotPoolTest.java rename to flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolTest.java index 1af9cce834bba..707ea00008fa8 100644 --- a/flink-runtime/src/test/java/org/apache/flink/runtime/instance/SlotPoolTest.java +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPoolTest.java @@ -16,17 +16,20 @@ * limitations under the License. */ -package org.apache.flink.runtime.instance; +package org.apache.flink.runtime.jobmaster.slotpool; import org.apache.flink.api.common.JobID; import org.apache.flink.api.common.time.Time; import org.apache.flink.runtime.clusterframework.types.AllocationID; import org.apache.flink.runtime.clusterframework.types.ResourceProfile; -import org.apache.flink.runtime.executiongraph.Execution; import org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway; +import org.apache.flink.runtime.instance.SlotSharingGroupId; +import org.apache.flink.runtime.jobgraph.JobVertexID; import org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit; import org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway; import org.apache.flink.runtime.jobmaster.JobMasterId; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.jobmaster.SlotRequestId; import org.apache.flink.runtime.messages.Acknowledge; import org.apache.flink.runtime.resourcemanager.ResourceManagerGateway; import org.apache.flink.runtime.resourcemanager.SlotRequest; @@ -49,14 +52,15 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.Nullable; + import java.util.Collections; import java.util.List; -import java.util.concurrent.CancellationException; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; -import static org.apache.flink.runtime.instance.AvailableSlotsTest.DEFAULT_TESTING_PROFILE; +import static org.apache.flink.runtime.jobmaster.slotpool.AvailableSlotsTest.DEFAULT_TESTING_PROFILE; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; @@ -105,8 +109,14 @@ public void testAllocateSimpleSlot() throws Exception { SlotPoolGateway slotPoolGateway = setupSlotPool(slotPool, resourceManagerGateway); slotPoolGateway.registerTaskManager(taskManagerLocation.getResourceID()); - SlotRequestID requestId = new SlotRequestID(); - CompletableFuture future = slotPoolGateway.allocateSlot(requestId, mock(ScheduledUnit.class), DEFAULT_TESTING_PROFILE, null, timeout); + SlotRequestId requestId = new SlotRequestId(); + CompletableFuture future = slotPoolGateway.allocateSlot( + requestId, + mock(ScheduledUnit.class), + DEFAULT_TESTING_PROFILE, + Collections.emptyList(), + true, + timeout); assertFalse(future.isDone()); ArgumentCaptor slotRequestArgumentCaptor = ArgumentCaptor.forClass(SlotRequest.class); @@ -139,8 +149,20 @@ public void testAllocationFulfilledByReturnedSlot() throws Exception { SlotPoolGateway slotPoolGateway = setupSlotPool(slotPool, resourceManagerGateway); slotPool.registerTaskManager(taskManagerLocation.getResourceID()); - CompletableFuture future1 = slotPoolGateway.allocateSlot(new SlotRequestID(), mock(ScheduledUnit.class), DEFAULT_TESTING_PROFILE, null, timeout); - CompletableFuture future2 = slotPoolGateway.allocateSlot(new SlotRequestID(), mock(ScheduledUnit.class), DEFAULT_TESTING_PROFILE, null, timeout); + CompletableFuture future1 = slotPoolGateway.allocateSlot( + new SlotRequestId(), + mock(ScheduledUnit.class), + DEFAULT_TESTING_PROFILE, + Collections.emptyList(), + true, + timeout); + CompletableFuture future2 = slotPoolGateway.allocateSlot( + new SlotRequestId(), + mock(ScheduledUnit.class), + DEFAULT_TESTING_PROFILE, + Collections.emptyList(), + true, + timeout); assertFalse(future1.isDone()); assertFalse(future2.isDone()); @@ -189,7 +211,13 @@ public void testAllocateWithFreeSlot() throws Exception { SlotPoolGateway slotPoolGateway = setupSlotPool(slotPool, resourceManagerGateway); slotPoolGateway.registerTaskManager(taskManagerLocation.getResourceID()); - CompletableFuture future1 = slotPoolGateway.allocateSlot(new SlotRequestID(), mock(ScheduledUnit.class), DEFAULT_TESTING_PROFILE, null, timeout); + CompletableFuture future1 = slotPoolGateway.allocateSlot( + new SlotRequestId(), + mock(ScheduledUnit.class), + DEFAULT_TESTING_PROFILE, + Collections.emptyList(), + true, + timeout); assertFalse(future1.isDone()); ArgumentCaptor slotRequestArgumentCaptor = ArgumentCaptor.forClass(SlotRequest.class); @@ -210,7 +238,13 @@ public void testAllocateWithFreeSlot() throws Exception { // return this slot to pool slot1.releaseSlot(); - CompletableFuture future2 = slotPoolGateway.allocateSlot(new SlotRequestID(), mock(ScheduledUnit.class), DEFAULT_TESTING_PROFILE, null, timeout); + CompletableFuture future2 = slotPoolGateway.allocateSlot( + new SlotRequestId(), + mock(ScheduledUnit.class), + DEFAULT_TESTING_PROFILE, + Collections.emptyList(), + true, + timeout); // second allocation fulfilled by previous slot returning LogicalSlot slot2 = future2.get(1, TimeUnit.SECONDS); @@ -235,7 +269,13 @@ public void testOfferSlot() throws Exception { SlotPoolGateway slotPoolGateway = setupSlotPool(slotPool, resourceManagerGateway); slotPoolGateway.registerTaskManager(taskManagerLocation.getResourceID()); - CompletableFuture future = slotPoolGateway.allocateSlot(new SlotRequestID(), mock(ScheduledUnit.class), DEFAULT_TESTING_PROFILE, null, timeout); + CompletableFuture future = slotPoolGateway.allocateSlot( + new SlotRequestId(), + mock(ScheduledUnit.class), + DEFAULT_TESTING_PROFILE, + Collections.emptyList(), + true, + timeout); assertFalse(future.isDone()); ArgumentCaptor slotRequestArgumentCaptor = ArgumentCaptor.forClass(SlotRequest.class); @@ -286,10 +326,18 @@ public void testReleaseResource() throws Exception { final SlotPool slotPool = new SlotPool(rpcService, jobId) { @Override - public void returnAllocatedSlot(SlotRequestID slotRequestId) { - super.returnAllocatedSlot(slotRequestId); + public CompletableFuture releaseSlot( + SlotRequestId slotRequestId, + @Nullable SlotSharingGroupId slotSharingGroupId, + @Nullable Throwable cause) { + super.releaseSlot( + slotRequestId, + slotSharingGroupId, + cause); slotReturnFuture.complete(true); + + return CompletableFuture.completedFuture(Acknowledge.get()); } }; @@ -297,14 +345,26 @@ public void returnAllocatedSlot(SlotRequestID slotRequestId) { SlotPoolGateway slotPoolGateway = setupSlotPool(slotPool, resourceManagerGateway); slotPoolGateway.registerTaskManager(taskManagerLocation.getResourceID()); - CompletableFuture future1 = slotPoolGateway.allocateSlot(new SlotRequestID(), mock(ScheduledUnit.class), DEFAULT_TESTING_PROFILE, null, timeout); + CompletableFuture future1 = slotPoolGateway.allocateSlot( + new SlotRequestId(), + mock(ScheduledUnit.class), + DEFAULT_TESTING_PROFILE, + Collections.emptyList(), + true, + timeout); ArgumentCaptor slotRequestArgumentCaptor = ArgumentCaptor.forClass(SlotRequest.class); verify(resourceManagerGateway, Mockito.timeout(timeout.toMilliseconds())).requestSlot(any(JobMasterId.class), slotRequestArgumentCaptor.capture(), any(Time.class)); final SlotRequest slotRequest = slotRequestArgumentCaptor.getValue(); - CompletableFuture future2 = slotPoolGateway.allocateSlot(new SlotRequestID(), mock(ScheduledUnit.class), DEFAULT_TESTING_PROFILE, null, timeout); + CompletableFuture future2 = slotPoolGateway.allocateSlot( + new SlotRequestId(), + mock(ScheduledUnit.class), + DEFAULT_TESTING_PROFILE, + Collections.emptyList(), + true, + timeout); final SlotOffer slotOffer = new SlotOffer( slotRequest.getAllocationId(), @@ -349,7 +409,10 @@ public void testSlotRequestCancellationUponFailingRequest() throws Exception { resourceManagerGateway.setRequestSlotConsumer(slotRequest -> requestSlotFutureAllocationId.complete(slotRequest.getAllocationId())); resourceManagerGateway.setCancelSlotConsumer(allocationID -> cancelSlotFuture.complete(allocationID)); - final ScheduledUnit scheduledUnit = new ScheduledUnit(mock(Execution.class)); + final ScheduledUnit scheduledUnit = new ScheduledUnit( + new JobVertexID(), + null, + null); try { slotPool.start(JobMasterId.generate(), "localhost"); @@ -359,10 +422,11 @@ public void testSlotRequestCancellationUponFailingRequest() throws Exception { slotPoolGateway.connectToResourceManager(resourceManagerGateway); CompletableFuture slotFuture = slotPoolGateway.allocateSlot( - new SlotRequestID(), + new SlotRequestId(), scheduledUnit, ResourceProfile.UNKNOWN, Collections.emptyList(), + true, timeout); requestSlotFuture.completeExceptionally(new FlinkException("Testing exception.")); @@ -404,15 +468,18 @@ public void testFulfillingSlotRequestsWithUnusedOfferedSlots() throws Exception resourceManagerGateway.setRequestSlotConsumer( (SlotRequest slotRequest) -> allocationIdFuture.complete(slotRequest.getAllocationId())); - final SlotRequestID slotRequestId1 = new SlotRequestID(); - final SlotRequestID slotRequestId2 = new SlotRequestID(); + final SlotRequestId slotRequestId1 = new SlotRequestId(); + final SlotRequestId slotRequestId2 = new SlotRequestId(); try { slotPool.start(jobMasterId, jobMasterAddress); final SlotPoolGateway slotPoolGateway = slotPool.getSelfGateway(SlotPoolGateway.class); - final ScheduledUnit scheduledUnit = new ScheduledUnit(mock(Execution.class)); + final ScheduledUnit scheduledUnit = new ScheduledUnit( + new JobVertexID(), + null, + null); slotPoolGateway.connectToResourceManager(resourceManagerGateway); @@ -421,6 +488,7 @@ public void testFulfillingSlotRequestsWithUnusedOfferedSlots() throws Exception scheduledUnit, ResourceProfile.UNKNOWN, Collections.emptyList(), + true, timeout); // wait for the first slot request @@ -431,16 +499,19 @@ public void testFulfillingSlotRequestsWithUnusedOfferedSlots() throws Exception scheduledUnit, ResourceProfile.UNKNOWN, Collections.emptyList(), + true, timeout); - slotPoolGateway.cancelSlotRequest(slotRequestId1); + slotPoolGateway.releaseSlot(slotRequestId1, null, null); try { // this should fail with a CancellationException slotFuture1.get(); fail("The first slot future should have failed because it was cancelled."); } catch (ExecutionException ee) { - assertTrue(ExceptionUtils.stripExecutionException(ee) instanceof CancellationException); + // expected + assertTrue(ExceptionUtils.stripExecutionException(ee) instanceof FlinkException); + } final SlotOffer slotOffer = new SlotOffer(allocationId, 0, ResourceProfile.UNKNOWN); diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotSharingManagerTest.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotSharingManagerTest.java new file mode 100644 index 0000000000000..c355f3839468a --- /dev/null +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/SlotSharingManagerTest.java @@ -0,0 +1,519 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.jobmaster.slotpool; + +import org.apache.flink.runtime.clusterframework.types.AllocationID; +import org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway; +import org.apache.flink.runtime.instance.SimpleSlotContext; +import org.apache.flink.runtime.instance.SlotSharingGroupId; +import org.apache.flink.runtime.jobmanager.scheduler.Locality; +import org.apache.flink.runtime.jobmanager.slots.DummySlotOwner; +import org.apache.flink.runtime.jobmaster.LogicalSlot; +import org.apache.flink.runtime.jobmaster.SlotContext; +import org.apache.flink.runtime.jobmaster.SlotRequestId; +import org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation; +import org.apache.flink.util.AbstractID; +import org.apache.flink.util.FlinkException; +import org.apache.flink.util.TestLogger; + +import org.junit.Test; + +import java.util.Collections; +import java.util.Objects; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +/** + * Test cases for the {@link SlotSharingManager}. + */ +public class SlotSharingManagerTest extends TestLogger { + + private static final SlotSharingGroupId SLOT_SHARING_GROUP_ID = new SlotSharingGroupId(); + + private static final DummySlotOwner SLOT_OWNER = new DummySlotOwner(); + + @Test + public void testRootSlotCreation() { + final TestingAllocatedSlotActions allocatedSlotActions = new TestingAllocatedSlotActions(); + + final SlotSharingManager slotSharingManager = new SlotSharingManager( + SLOT_SHARING_GROUP_ID, + allocatedSlotActions, + SLOT_OWNER); + + SlotRequestId slotRequestId = new SlotRequestId(); + SlotRequestId allocatedSlotRequestId = new SlotRequestId(); + + final SlotSharingManager.MultiTaskSlot multiTaskSlot = slotSharingManager.createRootSlot( + slotRequestId, + new CompletableFuture<>(), + allocatedSlotRequestId); + + assertEquals(slotRequestId, multiTaskSlot.getSlotRequestId()); + assertNotNull(slotSharingManager.getTaskSlot(slotRequestId)); + } + + @Test + public void testRootSlotRelease() throws ExecutionException, InterruptedException { + final CompletableFuture slotReleasedFuture = new CompletableFuture<>(); + final TestingAllocatedSlotActions allocatedSlotActions = new TestingAllocatedSlotActions(); + + allocatedSlotActions.setReleaseSlotConsumer( + tuple3 -> slotReleasedFuture.complete(tuple3.f0)); + + final SlotSharingManager slotSharingManager = new SlotSharingManager( + SLOT_SHARING_GROUP_ID, + allocatedSlotActions, + SLOT_OWNER); + + SlotRequestId slotRequestId = new SlotRequestId(); + SlotRequestId allocatedSlotRequestId = new SlotRequestId(); + + CompletableFuture slotContextFuture = new CompletableFuture<>(); + + SlotSharingManager.MultiTaskSlot rootSlot = slotSharingManager.createRootSlot( + slotRequestId, + slotContextFuture, + allocatedSlotRequestId); + + assertTrue(slotSharingManager.contains(slotRequestId)); + + assertTrue(rootSlot.release(new FlinkException("Test exception"))); + + // check that we return the allocated slot + assertEquals(allocatedSlotRequestId, slotReleasedFuture.get()); + + assertFalse(slotSharingManager.contains(slotRequestId)); + } + + /** + * Tests that we can create nested slots. + */ + @Test + public void testNestedSlotCreation() { + final TestingAllocatedSlotActions allocatedSlotActions = new TestingAllocatedSlotActions(); + + final SlotSharingManager slotSharingManager = new SlotSharingManager( + SLOT_SHARING_GROUP_ID, + allocatedSlotActions, + SLOT_OWNER); + + SlotSharingManager.MultiTaskSlot rootSlot = slotSharingManager.createRootSlot( + new SlotRequestId(), + new CompletableFuture<>(), + new SlotRequestId()); + + AbstractID singleTaskSlotGroupId = new AbstractID(); + SlotRequestId singleTaskSlotRequestId = new SlotRequestId(); + SlotSharingManager.SingleTaskSlot singleTaskSlot = rootSlot.allocateSingleTaskSlot( + singleTaskSlotRequestId, + singleTaskSlotGroupId, + Locality.LOCAL); + + AbstractID multiTaskSlotGroupId = new AbstractID(); + SlotRequestId multiTaskSlotRequestId = new SlotRequestId(); + SlotSharingManager.MultiTaskSlot multiTaskSlot = rootSlot.allocateMultiTaskSlot( + multiTaskSlotRequestId, + multiTaskSlotGroupId); + + assertTrue(Objects.equals(singleTaskSlotRequestId, singleTaskSlot.getSlotRequestId())); + assertTrue(Objects.equals(multiTaskSlotRequestId, multiTaskSlot.getSlotRequestId())); + + assertTrue(rootSlot.contains(singleTaskSlotGroupId)); + assertTrue(rootSlot.contains(multiTaskSlotGroupId)); + + assertTrue(slotSharingManager.contains(singleTaskSlotRequestId)); + assertTrue(slotSharingManager.contains(multiTaskSlotRequestId)); + } + + /** + * Tests that we can release nested slots from the leaves onwards + */ + @Test + public void testNestedSlotRelease() throws Exception { + TestingAllocatedSlotActions testingAllocatedSlotActions = new TestingAllocatedSlotActions(); + + final CompletableFuture releasedSlotFuture = new CompletableFuture<>(); + testingAllocatedSlotActions.setReleaseSlotConsumer( + tuple3 -> releasedSlotFuture.complete(tuple3.f0)); + + final SlotSharingManager slotSharingManager = new SlotSharingManager( + SLOT_SHARING_GROUP_ID, + testingAllocatedSlotActions, + SLOT_OWNER); + + SlotRequestId rootSlotRequestId = new SlotRequestId(); + SlotRequestId allocatedSlotRequestId = new SlotRequestId(); + SlotSharingManager.MultiTaskSlot rootSlot = slotSharingManager.createRootSlot( + rootSlotRequestId, + new CompletableFuture<>(), + allocatedSlotRequestId); + + SlotRequestId singleTaskSlotRequestId = new SlotRequestId(); + SlotSharingManager.SingleTaskSlot singleTaskSlot = rootSlot.allocateSingleTaskSlot( + singleTaskSlotRequestId, + new AbstractID(), + Locality.LOCAL); + + SlotRequestId multiTaskSlotRequestId = new SlotRequestId(); + SlotSharingManager.MultiTaskSlot multiTaskSlot = rootSlot.allocateMultiTaskSlot( + multiTaskSlotRequestId, + new AbstractID()); + + CompletableFuture singleTaskSlotFuture = singleTaskSlot.getLogicalSlotFuture(); + + assertTrue(slotSharingManager.contains(rootSlotRequestId)); + assertTrue(slotSharingManager.contains(singleTaskSlotRequestId)); + assertFalse(singleTaskSlotFuture.isDone()); + + FlinkException testException = new FlinkException("Test exception"); + assertTrue(singleTaskSlot.release(testException)); + + // check that we fail the single task slot future + assertTrue(singleTaskSlotFuture.isCompletedExceptionally()); + assertFalse(slotSharingManager.contains(singleTaskSlotRequestId)); + + // the root slot has still one child + assertTrue(slotSharingManager.contains(rootSlotRequestId)); + + assertTrue(multiTaskSlot.release(testException)); + + assertEquals(allocatedSlotRequestId, releasedSlotFuture.get()); + assertFalse(slotSharingManager.contains(rootSlotRequestId)); + assertFalse(slotSharingManager.contains(multiTaskSlotRequestId)); + + assertTrue(slotSharingManager.isEmpty()); + } + + /** + * Tests that we can release inner slots and that this triggers the slot release for all + * its children. + */ + @Test + public void testInnerSlotRelease() { + final TestingAllocatedSlotActions allocatedSlotActions = new TestingAllocatedSlotActions(); + + final SlotSharingManager slotSharingManager = new SlotSharingManager( + SLOT_SHARING_GROUP_ID, + allocatedSlotActions, + SLOT_OWNER); + + SlotSharingManager.MultiTaskSlot rootSlot = slotSharingManager.createRootSlot( + new SlotRequestId(), + new CompletableFuture<>(), + new SlotRequestId()); + + SlotSharingManager.MultiTaskSlot multiTaskSlot = rootSlot.allocateMultiTaskSlot( + new SlotRequestId(), + new AbstractID()); + + SlotSharingManager.SingleTaskSlot singleTaskSlot1 = multiTaskSlot.allocateSingleTaskSlot( + new SlotRequestId(), + new AbstractID(), + Locality.LOCAL); + + SlotSharingManager.MultiTaskSlot multiTaskSlot1 = multiTaskSlot.allocateMultiTaskSlot( + new SlotRequestId(), + new AbstractID()); + + assertTrue(slotSharingManager.contains(multiTaskSlot1.getSlotRequestId())); + assertTrue(slotSharingManager.contains(singleTaskSlot1.getSlotRequestId())); + assertTrue(slotSharingManager.contains(multiTaskSlot.getSlotRequestId())); + + multiTaskSlot.release(new FlinkException("Test exception")); + + assertFalse(slotSharingManager.contains(multiTaskSlot1.getSlotRequestId())); + assertFalse(slotSharingManager.contains(singleTaskSlot1.getSlotRequestId())); + assertFalse(slotSharingManager.contains(multiTaskSlot.getSlotRequestId())); + assertTrue(singleTaskSlot1.getLogicalSlotFuture().isCompletedExceptionally()); + } + + /** + * Tests that the logical task slot futures are completed once the slot context + * future is completed. + */ + @Test + public void testSlotContextFutureCompletion() throws Exception { + final TestingAllocatedSlotActions allocatedSlotActions = new TestingAllocatedSlotActions(); + + final SlotSharingManager slotSharingManager = new SlotSharingManager( + SLOT_SHARING_GROUP_ID, + allocatedSlotActions, + SLOT_OWNER); + + final SlotContext slotContext = new SimpleSlotContext( + new AllocationID(), + new LocalTaskManagerLocation(), + 0, + new SimpleAckingTaskManagerGateway()); + + CompletableFuture slotContextFuture = new CompletableFuture<>(); + SlotSharingManager.MultiTaskSlot rootSlot = slotSharingManager.createRootSlot( + new SlotRequestId(), + slotContextFuture, + new SlotRequestId()); + + Locality locality1 = Locality.LOCAL; + SlotSharingManager.SingleTaskSlot singleTaskSlot1 = rootSlot.allocateSingleTaskSlot( + new SlotRequestId(), + new AbstractID(), + locality1); + + Locality locality2 = Locality.HOST_LOCAL; + SlotSharingManager.SingleTaskSlot singleTaskSlot2 = rootSlot.allocateSingleTaskSlot( + new SlotRequestId(), + new AbstractID(), + locality2); + + CompletableFuture logicalSlotFuture1 = singleTaskSlot1.getLogicalSlotFuture(); + CompletableFuture logicalSlotFuture2 = singleTaskSlot2.getLogicalSlotFuture(); + assertFalse(logicalSlotFuture1.isDone()); + assertFalse(logicalSlotFuture2.isDone()); + + slotContextFuture.complete(slotContext); + + assertTrue(logicalSlotFuture1.isDone()); + assertTrue(logicalSlotFuture2.isDone()); + + final LogicalSlot logicalSlot1 = logicalSlotFuture1.get(); + final LogicalSlot logicalSlot2 = logicalSlotFuture2.get(); + + assertEquals(logicalSlot1.getAllocationId(), slotContext.getAllocationId()); + assertEquals(logicalSlot2.getAllocationId(), slotContext.getAllocationId()); + assertEquals(locality1, logicalSlot1.getLocality()); + assertEquals(locality2, logicalSlot2.getLocality()); + + Locality locality3 = Locality.NON_LOCAL; + SlotSharingManager.SingleTaskSlot singleTaskSlot3 = rootSlot.allocateSingleTaskSlot( + new SlotRequestId(), + new AbstractID(), + locality3); + + CompletableFuture logicalSlotFuture3 = singleTaskSlot3.getLogicalSlotFuture(); + + assertTrue(logicalSlotFuture3.isDone()); + LogicalSlot logicalSlot3 = logicalSlotFuture3.get(); + + assertEquals(locality3, logicalSlot3.getLocality()); + assertEquals(slotContext.getAllocationId(), logicalSlot3.getAllocationId()); + } + + /** + * Tests that slot context future failures will release the root slot + */ + @Test + public void testSlotContextFutureFailure() { + final TestingAllocatedSlotActions allocatedSlotActions = new TestingAllocatedSlotActions(); + + SlotSharingManager slotSharingManager = new SlotSharingManager( + SLOT_SHARING_GROUP_ID, + allocatedSlotActions, + SLOT_OWNER); + + CompletableFuture slotContextFuture = new CompletableFuture<>(); + + assertTrue(slotSharingManager.isEmpty()); + + SlotSharingManager.MultiTaskSlot rootSlot = slotSharingManager.createRootSlot( + new SlotRequestId(), + slotContextFuture, + new SlotRequestId()); + + SlotSharingManager.SingleTaskSlot singleTaskSlot = rootSlot.allocateSingleTaskSlot( + new SlotRequestId(), + new AbstractID(), + Locality.LOCAL); + + slotContextFuture.completeExceptionally(new FlinkException("Test exception")); + + assertTrue(singleTaskSlot.getLogicalSlotFuture().isCompletedExceptionally()); + assertTrue(slotSharingManager.isEmpty()); + assertTrue(slotSharingManager.getResolvedRootSlots().isEmpty()); + assertTrue(slotSharingManager.getUnresolvedRootSlots().isEmpty()); + } + + /** + * Tests that the root slot are moved from unresolved to resolved once the + * slot context future is successfully completed + */ + @Test + public void testRootSlotTransition() { + final TestingAllocatedSlotActions allocatedSlotActions = new TestingAllocatedSlotActions(); + + SlotSharingManager slotSharingManager = new SlotSharingManager( + SLOT_SHARING_GROUP_ID, + allocatedSlotActions, + SLOT_OWNER); + + CompletableFuture slotContextFuture = new CompletableFuture<>(); + SlotSharingManager.MultiTaskSlot rootSlot = slotSharingManager.createRootSlot( + new SlotRequestId(), + slotContextFuture, + new SlotRequestId()); + + assertTrue(slotSharingManager.getUnresolvedRootSlots().contains(rootSlot)); + assertFalse(slotSharingManager.getResolvedRootSlots().contains(rootSlot)); + + // now complete the slotContextFuture + slotContextFuture.complete( + new SimpleSlotContext( + new AllocationID(), + new LocalTaskManagerLocation(), + 0, + new SimpleAckingTaskManagerGateway())); + + assertFalse(slotSharingManager.getUnresolvedRootSlots().contains(rootSlot)); + assertTrue(slotSharingManager.getResolvedRootSlots().contains(rootSlot)); + } + + /** + * Tests that we can correctly retrieve resolved slots. + */ + @Test + public void testGetResolvedSlot() { + final TestingAllocatedSlotActions allocatedSlotActions = new TestingAllocatedSlotActions(); + + SlotSharingManager slotSharingManager = new SlotSharingManager( + SLOT_SHARING_GROUP_ID, + allocatedSlotActions, + SLOT_OWNER); + + SlotSharingManager.MultiTaskSlot rootSlot = slotSharingManager.createRootSlot( + new SlotRequestId(), + CompletableFuture.completedFuture( + new SimpleSlotContext( + new AllocationID(), + new LocalTaskManagerLocation(), + 0, + new SimpleAckingTaskManagerGateway())), + new SlotRequestId()); + + AbstractID groupId = new AbstractID(); + SlotSharingManager.MultiTaskSlotLocality resolvedRootSlotLocality = slotSharingManager.getResolvedRootSlot(groupId, Collections.emptyList()); + + assertNotNull(resolvedRootSlotLocality); + assertEquals(Locality.UNCONSTRAINED, resolvedRootSlotLocality.getLocality()); + assertEquals(rootSlot.getSlotRequestId(), resolvedRootSlotLocality.getMultiTaskSlot().getSlotRequestId()); + + SlotSharingManager.MultiTaskSlot resolvedRootSlot = resolvedRootSlotLocality.getMultiTaskSlot(); + + // occupy the resolved root slot + resolvedRootSlot.allocateSingleTaskSlot( + new SlotRequestId(), + groupId, + resolvedRootSlotLocality.getLocality()); + + SlotSharingManager.MultiTaskSlotLocality resolvedRootSlot1 = slotSharingManager.getResolvedRootSlot( + groupId, + Collections.emptyList()); + + assertNull(resolvedRootSlot1); + } + + /** + * Tests that the location preferences are honoured when looking for a resolved slot. + */ + @Test + public void testGetResolvedSlotWithLocationPreferences() { + final TestingAllocatedSlotActions allocatedSlotActions = new TestingAllocatedSlotActions(); + + SlotSharingManager slotSharingManager = new SlotSharingManager( + SLOT_SHARING_GROUP_ID, + allocatedSlotActions, + SLOT_OWNER); + + SlotSharingManager.MultiTaskSlot rootSlot1 = slotSharingManager.createRootSlot( + new SlotRequestId(), + CompletableFuture.completedFuture( + new SimpleSlotContext( + new AllocationID(), + new LocalTaskManagerLocation(), + 0, + new SimpleAckingTaskManagerGateway())), + new SlotRequestId()); + + LocalTaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation(); + SlotSharingManager.MultiTaskSlot rootSlot2 = slotSharingManager.createRootSlot( + new SlotRequestId(), + CompletableFuture.completedFuture( + new SimpleSlotContext( + new AllocationID(), + taskManagerLocation, + 0, + new SimpleAckingTaskManagerGateway())), + new SlotRequestId()); + + AbstractID groupId = new AbstractID(); + SlotSharingManager.MultiTaskSlotLocality resolvedRootSlot1 = slotSharingManager.getResolvedRootSlot(groupId, Collections.singleton(taskManagerLocation)); + assertNotNull(resolvedRootSlot1); + assertEquals(Locality.LOCAL, resolvedRootSlot1.getLocality()); + assertEquals(rootSlot2.getSlotRequestId(), resolvedRootSlot1.getMultiTaskSlot().getSlotRequestId()); + + // occupy the slot + resolvedRootSlot1.getMultiTaskSlot().allocateSingleTaskSlot( + new SlotRequestId(), + groupId, + resolvedRootSlot1.getLocality()); + + SlotSharingManager.MultiTaskSlotLocality resolvedRootSlot2 = slotSharingManager.getResolvedRootSlot(groupId, Collections.singleton(taskManagerLocation)); + + assertNotNull(resolvedRootSlot2); + assertNotSame(Locality.LOCAL, (resolvedRootSlot2.getLocality())); + assertEquals(rootSlot1.getSlotRequestId(), resolvedRootSlot2.getMultiTaskSlot().getSlotRequestId()); + } + + @Test + public void testGetUnresolvedSlot() { + final TestingAllocatedSlotActions allocatedSlotActions = new TestingAllocatedSlotActions(); + + SlotSharingManager slotSharingManager = new SlotSharingManager( + SLOT_SHARING_GROUP_ID, + allocatedSlotActions, + SLOT_OWNER); + + SlotSharingManager.MultiTaskSlot rootSlot1 = slotSharingManager.createRootSlot( + new SlotRequestId(), + new CompletableFuture<>(), + new SlotRequestId()); + + final AbstractID groupId = new AbstractID(); + SlotSharingManager.MultiTaskSlot unresolvedRootSlot = slotSharingManager.getUnresolvedRootSlot(groupId); + + assertNotNull(unresolvedRootSlot); + assertEquals(rootSlot1.getSlotRequestId(), unresolvedRootSlot.getSlotRequestId()); + + // occupy the unresolved slot + unresolvedRootSlot.allocateSingleTaskSlot( + new SlotRequestId(), + groupId, + Locality.UNKNOWN); + + SlotSharingManager.MultiTaskSlot unresolvedRootSlot1 = slotSharingManager.getUnresolvedRootSlot(groupId); + + // we should no longer have a free unresolved root slot + assertNull(unresolvedRootSlot1); + } +} diff --git a/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/TestingAllocatedSlotActions.java b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/TestingAllocatedSlotActions.java new file mode 100644 index 0000000000000..159d0760e0405 --- /dev/null +++ b/flink-runtime/src/test/java/org/apache/flink/runtime/jobmaster/slotpool/TestingAllocatedSlotActions.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.jobmaster.slotpool; + +import org.apache.flink.api.java.tuple.Tuple3; +import org.apache.flink.runtime.instance.SlotSharingGroupId; +import org.apache.flink.runtime.jobmaster.SlotRequestId; +import org.apache.flink.runtime.messages.Acknowledge; + +import javax.annotation.Nullable; + +import java.util.concurrent.CompletableFuture; +import java.util.function.Consumer; + +/** + * Simple {@link AllocatedSlotActions} implementations for testing purposes. + */ +public class TestingAllocatedSlotActions implements AllocatedSlotActions { + + private volatile Consumer> releaseSlotConsumer; + + public void setReleaseSlotConsumer(@Nullable Consumer> releaseSlotConsumer) { + this.releaseSlotConsumer = releaseSlotConsumer; + } + + @Override + public CompletableFuture releaseSlot(SlotRequestId slotRequestId, @Nullable SlotSharingGroupId slotSharingGroupId, @Nullable Throwable cause) { + Consumer> currentReleaseSlotConsumer = this.releaseSlotConsumer; + + if (currentReleaseSlotConsumer != null) { + currentReleaseSlotConsumer.accept(Tuple3.of(slotRequestId, slotSharingGroupId, cause)); + } + + return CompletableFuture.completedFuture(Acknowledge.get()); + } +} diff --git a/flink-tests/src/test/java/org/apache/flink/test/recovery/FastFailuresITCase.java b/flink-tests/src/test/java/org/apache/flink/test/recovery/FastFailuresITCase.java index eb596d4022b11..9a59e8863c20c 100644 --- a/flink-tests/src/test/java/org/apache/flink/test/recovery/FastFailuresITCase.java +++ b/flink-tests/src/test/java/org/apache/flink/test/recovery/FastFailuresITCase.java @@ -99,6 +99,8 @@ public void invoke(Integer value) {} catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); + } finally { + cluster.stop(); } } }