[FLINK-19286][runtime] Improve region vertex sorting performance

aghyad · Sep 23, 2020 · f89c137 · f89c137
1 parent 6045da0
commit f89c137
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 6 deletions.
diff --git a/...n/java/org/apache/flink/runtime/scheduler/strategy/PipelinedRegionSchedulingStrategy.java b/...n/java/org/apache/flink/runtime/scheduler/strategy/PipelinedRegionSchedulingStrategy.java
@@ -27,8 +27,10 @@
 import org.apache.flink.runtime.scheduler.SchedulerOperations;
 import org.apache.flink.util.IterableUtils;
 
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -53,6 +55,8 @@ public class PipelinedRegionSchedulingStrategy implements SchedulingStrategy {
 
 	private final Map<IntermediateResultPartitionID, Set<SchedulingPipelinedRegion>> partitionConsumerRegions = new HashMap<>();
 
+	private final Map<SchedulingPipelinedRegion, List<ExecutionVertexID>> regionVerticesSorted = new IdentityHashMap<>();
+
 	public PipelinedRegionSchedulingStrategy(
 			final SchedulerOperations schedulerOperations,
 			final SchedulingTopology schedulingTopology) {
@@ -72,6 +76,11 @@ private void init() {
 				correlatedResultPartitions.computeIfAbsent(partition.getResultId(), rid -> new HashSet<>()).add(partition);
 			}
 		}
+
+		for (SchedulingExecutionVertex vertex : schedulingTopology.getVertices()) {
+			final SchedulingPipelinedRegion region = schedulingTopology.getPipelinedRegionOfVertex(vertex.getId());
+			regionVerticesSorted.computeIfAbsent(region, r -> new ArrayList<>()).add(vertex.getId());
+		}
 	}
 
 	@Override
@@ -127,13 +136,9 @@ private void maybeScheduleRegion(final SchedulingPipelinedRegion region) {
 
 		checkState(areRegionVerticesAllInCreatedState(region), "BUG: trying to schedule a region which is not in CREATED state");
 
-		final Set<ExecutionVertexID> verticesToSchedule = IterableUtils.toStream(region.getVertices())
-			.map(SchedulingExecutionVertex::getId)
-			.collect(Collectors.toSet());
 		final List<ExecutionVertexDeploymentOption> vertexDeploymentOptions =
-			SchedulingStrategyUtils.createExecutionVertexDeploymentOptionsInTopologicalOrder(
-				schedulingTopology,
-				verticesToSchedule,
+			SchedulingStrategyUtils.createExecutionVertexDeploymentOptions(
+				regionVerticesSorted.get(region),
 				id -> deploymentOption);
 		schedulerOperations.allocateSlotsAndDeploy(vertexDeploymentOptions);
 	}

diff --git a/...me/src/main/java/org/apache/flink/runtime/scheduler/strategy/SchedulingStrategyUtils.java b/...me/src/main/java/org/apache/flink/runtime/scheduler/strategy/SchedulingStrategyUtils.java
@@ -22,6 +22,8 @@
 import org.apache.flink.runtime.scheduler.ExecutionVertexDeploymentOption;
 import org.apache.flink.util.IterableUtils;
 
+import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
 import java.util.Set;
 import java.util.function.Function;
@@ -61,6 +63,20 @@ static List<ExecutionVertexDeploymentOption> createExecutionVertexDeploymentOpti
 			.collect(Collectors.toList());
 	}
 
+	static List<ExecutionVertexDeploymentOption> createExecutionVertexDeploymentOptions(
+			final Collection<ExecutionVertexID> verticesToDeploy,
+			final Function<ExecutionVertexID, DeploymentOption> deploymentOptionRetriever) {
+
+		final List<ExecutionVertexDeploymentOption> deploymentOptions = new ArrayList<>(verticesToDeploy.size());
+		for (ExecutionVertexID executionVertexId : verticesToDeploy) {
+			final ExecutionVertexDeploymentOption deploymentOption = new ExecutionVertexDeploymentOption(
+				executionVertexId,
+				deploymentOptionRetriever.apply(executionVertexId));
+			deploymentOptions.add(deploymentOption);
+		}
+		return deploymentOptions;
+	}
+
 	static List<SchedulingPipelinedRegion> sortPipelinedRegionsInTopologicalOrder(
 			final SchedulingTopology topology,
 			final Set<SchedulingPipelinedRegion> regions) {