Skip to content

Commit a211ee9

Browse files
authored
KAFKA-17593; [7/N] Introduce CoordinatorExecutor (apache#17823)
This patch introduces the `CoordinatorExecutor` construct into the `CoordinatorRuntime`. It allows scheduling asynchronous tasks from within a `CoordinatorShard` while respecting the runtime semantic. It will be used to asynchronously resolve regular expressions. The `GroupCoordinatorService` uses a default `ExecutorService` with a single thread to back it at the moment. It seems that it should be sufficient. In the future, we could consider making the number of threads configurable. Reviewers: Jeff Kim <[email protected]>, Lianet Magrans <[email protected]>
1 parent a334b1b commit a211ee9

File tree

10 files changed

+801
-4
lines changed

10 files changed

+801
-4
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.kafka.coordinator.common.runtime;
18+
19+
import org.apache.kafka.common.KafkaException;
20+
21+
/**
22+
* An interface to schedule and cancel asynchronous tasks. The TaskRunnable
23+
* interface defines the tasks to be executed in the executor and the
24+
* TaskOperation defines the operation scheduled to the runtime to
25+
* process the output of the executed task.
26+
*
27+
* @param <T> The record type.
28+
*/
29+
public interface CoordinatorExecutor<T> {
30+
/**
31+
* The task's runnable.
32+
*
33+
* @param <R> The return type.
34+
*/
35+
interface TaskRunnable<R> {
36+
R run() throws Throwable;
37+
}
38+
39+
/**
40+
* The task's write operation to handle the output
41+
* of the task.
42+
*
43+
* @param <T> The record type.
44+
* @param <R> The return type of the task.
45+
*/
46+
interface TaskOperation<T, R> {
47+
CoordinatorResult<Void, T> onComplete(
48+
R result,
49+
Throwable exception
50+
) throws KafkaException;
51+
}
52+
53+
/**
54+
* Schedule an asynchronous task. Note that only one task for a given key can
55+
* be executed at the time.
56+
*
57+
* @param key The key to identify the task.
58+
* @param task The task itself.
59+
* @param operation The runtime operation to handle the output of the task.
60+
* @return True if the task was scheduled; False otherwise.
61+
*
62+
* @param <R> The return type of the task.
63+
*/
64+
<R> boolean schedule(
65+
String key,
66+
TaskRunnable<R> task,
67+
TaskOperation<T, R> operation
68+
);
69+
70+
/**
71+
* Return true if the key is associated to a task; false otherwise.
72+
*
73+
* @param key The key to identify the task.
74+
* @return A boolean indicating whether the task is scheduled or not.
75+
*/
76+
boolean isScheduled(String key);
77+
78+
/**
79+
* Cancel the given task
80+
*
81+
* @param key The key to identify the task.
82+
*/
83+
void cancel(String key);
84+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.kafka.coordinator.common.runtime;
18+
19+
import org.apache.kafka.common.TopicPartition;
20+
import org.apache.kafka.common.errors.CoordinatorLoadInProgressException;
21+
import org.apache.kafka.common.errors.NotCoordinatorException;
22+
import org.apache.kafka.common.utils.LogContext;
23+
24+
import org.slf4j.Logger;
25+
26+
import java.time.Duration;
27+
import java.util.Iterator;
28+
import java.util.Map;
29+
import java.util.concurrent.ConcurrentHashMap;
30+
import java.util.concurrent.ExecutorService;
31+
import java.util.concurrent.RejectedExecutionException;
32+
33+
public class CoordinatorExecutorImpl<S extends CoordinatorShard<U>, U> implements CoordinatorExecutor<U> {
34+
private static class TaskResult<R> {
35+
final R result;
36+
final Throwable exception;
37+
38+
TaskResult(
39+
R result,
40+
Throwable exception
41+
) {
42+
this.result = result;
43+
this.exception = exception;
44+
}
45+
}
46+
47+
private final Logger log;
48+
private final TopicPartition shard;
49+
private final CoordinatorRuntime<S, U> runtime;
50+
private final ExecutorService executor;
51+
private final Duration writeTimeout;
52+
private final Map<String, TaskRunnable<?>> tasks = new ConcurrentHashMap<>();
53+
54+
public CoordinatorExecutorImpl(
55+
LogContext logContext,
56+
TopicPartition shard,
57+
CoordinatorRuntime<S, U> runtime,
58+
ExecutorService executor,
59+
Duration writeTimeout
60+
) {
61+
this.log = logContext.logger(CoordinatorExecutorImpl.class);
62+
this.shard = shard;
63+
this.runtime = runtime;
64+
this.executor = executor;
65+
this.writeTimeout = writeTimeout;
66+
}
67+
68+
private <R> TaskResult<R> executeTask(TaskRunnable<R> task) {
69+
try {
70+
return new TaskResult<>(task.run(), null);
71+
} catch (Throwable ex) {
72+
return new TaskResult<>(null, ex);
73+
}
74+
}
75+
76+
@Override
77+
public <R> boolean schedule(
78+
String key,
79+
TaskRunnable<R> task,
80+
TaskOperation<U, R> operation
81+
) {
82+
// Put the task if the key is free. Otherwise, reject it.
83+
if (tasks.putIfAbsent(key, task) != null) return false;
84+
85+
// Submit the task.
86+
executor.submit(() -> {
87+
// If the task associated with the key is not us, it means
88+
// that the task was either replaced or cancelled. We stop.
89+
if (tasks.get(key) != task) return;
90+
91+
// Execute the task.
92+
final TaskResult<R> result = executeTask(task);
93+
94+
// Schedule the operation.
95+
runtime.scheduleWriteOperation(
96+
key,
97+
shard,
98+
writeTimeout,
99+
coordinator -> {
100+
// If the task associated with the key is not us, it means
101+
// that the task was either replaced or cancelled. We stop.
102+
if (!tasks.remove(key, task)) {
103+
throw new RejectedExecutionException(String.format("Task %s was overridden or cancelled", key));
104+
}
105+
106+
// Call the underlying write operation with the result of the task.
107+
return operation.onComplete(result.result, result.exception);
108+
}
109+
).exceptionally(exception -> {
110+
// Remove the task after a failure.
111+
tasks.remove(key, task);
112+
113+
if (exception instanceof RejectedExecutionException) {
114+
log.debug("The write event for the task {} was not executed because it was " +
115+
"cancelled or overridden.", key);
116+
} else if (exception instanceof NotCoordinatorException || exception instanceof CoordinatorLoadInProgressException) {
117+
log.debug("The write event for the task {} failed due to {}. Ignoring it because " +
118+
"the coordinator is not active.", key, exception.getMessage());
119+
} else {
120+
log.error("The write event for the task {} failed due to {}. Ignoring it. ",
121+
key, exception.getMessage());
122+
}
123+
124+
return null;
125+
});
126+
});
127+
128+
return true;
129+
}
130+
131+
@Override
132+
public boolean isScheduled(String key) {
133+
return tasks.containsKey(key);
134+
}
135+
136+
@Override
137+
public void cancel(String key) {
138+
tasks.remove(key);
139+
}
140+
141+
public void cancelAll() {
142+
Iterator<String> iterator = tasks.keySet().iterator();
143+
while (iterator.hasNext()) {
144+
iterator.remove();
145+
}
146+
}
147+
}

coordinator-common/src/main/java/org/apache/kafka/coordinator/common/runtime/CoordinatorRuntime.java

+36-2
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
import java.util.OptionalInt;
6161
import java.util.concurrent.CompletableFuture;
6262
import java.util.concurrent.ConcurrentHashMap;
63+
import java.util.concurrent.ExecutorService;
6364
import java.util.concurrent.RejectedExecutionException;
6465
import java.util.concurrent.TimeUnit;
6566
import java.util.concurrent.atomic.AtomicBoolean;
@@ -118,6 +119,7 @@ public static class Builder<S extends CoordinatorShard<U>, U> {
118119
private Serializer<U> serializer;
119120
private Compression compression;
120121
private int appendLingerMs;
122+
private ExecutorService executorService;
121123

122124
public Builder<S, U> withLogPrefix(String logPrefix) {
123125
this.logPrefix = logPrefix;
@@ -189,6 +191,11 @@ public Builder<S, U> withAppendLingerMs(int appendLingerMs) {
189191
return this;
190192
}
191193

194+
public Builder<S, U> withExecutorService(ExecutorService executorService) {
195+
this.executorService = executorService;
196+
return this;
197+
}
198+
192199
public CoordinatorRuntime<S, U> build() {
193200
if (logPrefix == null)
194201
logPrefix = "";
@@ -216,6 +223,8 @@ public CoordinatorRuntime<S, U> build() {
216223
compression = Compression.NONE;
217224
if (appendLingerMs < 0)
218225
throw new IllegalArgumentException("AppendLinger must be >= 0");
226+
if (executorService == null)
227+
throw new IllegalArgumentException("ExecutorService must be set.");
219228

220229
return new CoordinatorRuntime<>(
221230
logPrefix,
@@ -231,7 +240,8 @@ public CoordinatorRuntime<S, U> build() {
231240
coordinatorMetrics,
232241
serializer,
233242
compression,
234-
appendLingerMs
243+
appendLingerMs,
244+
executorService
235245
);
236246
}
237247
}
@@ -551,6 +561,11 @@ class CoordinatorContext {
551561
*/
552562
final EventBasedCoordinatorTimer timer;
553563

564+
/**
565+
* The coordinator executor.
566+
*/
567+
final CoordinatorExecutorImpl<S, U> executor;
568+
554569
/**
555570
* The current state.
556571
*/
@@ -603,6 +618,13 @@ private CoordinatorContext(
603618
this.epoch = -1;
604619
this.deferredEventQueue = new DeferredEventQueue(logContext);
605620
this.timer = new EventBasedCoordinatorTimer(tp, logContext);
621+
this.executor = new CoordinatorExecutorImpl<>(
622+
logContext,
623+
tp,
624+
CoordinatorRuntime.this,
625+
executorService,
626+
defaultWriteTimeout
627+
);
606628
this.bufferSupplier = new BufferSupplier.GrowableBufferSupplier();
607629
}
608630

@@ -633,6 +655,7 @@ private void transitionTo(
633655
.withSnapshotRegistry(snapshotRegistry)
634656
.withTime(time)
635657
.withTimer(timer)
658+
.withExecutor(executor)
636659
.withCoordinatorMetrics(coordinatorMetrics)
637660
.withTopicPartition(tp)
638661
.build(),
@@ -714,6 +737,7 @@ private void unload() {
714737
highWatermarklistener = null;
715738
}
716739
timer.cancelAll();
740+
executor.cancelAll();
717741
deferredEventQueue.failAll(Errors.NOT_COORDINATOR.exception());
718742
failCurrentBatch(Errors.NOT_COORDINATOR.exception());
719743
if (coordinator != null) {
@@ -1899,6 +1923,12 @@ public void onHighWatermarkUpdated(
18991923
*/
19001924
private final int appendLingerMs;
19011925

1926+
/**
1927+
* The executor service used by the coordinator runtime to schedule
1928+
* asynchronous tasks.
1929+
*/
1930+
private final ExecutorService executorService;
1931+
19021932
/**
19031933
* Atomic boolean indicating whether the runtime is running.
19041934
*/
@@ -1926,6 +1956,7 @@ public void onHighWatermarkUpdated(
19261956
* @param serializer The serializer.
19271957
* @param compression The compression codec.
19281958
* @param appendLingerMs The append linger time in ms.
1959+
* @param executorService The executor service.
19291960
*/
19301961
@SuppressWarnings("checkstyle:ParameterNumber")
19311962
private CoordinatorRuntime(
@@ -1942,7 +1973,8 @@ private CoordinatorRuntime(
19421973
CoordinatorMetrics coordinatorMetrics,
19431974
Serializer<U> serializer,
19441975
Compression compression,
1945-
int appendLingerMs
1976+
int appendLingerMs,
1977+
ExecutorService executorService
19461978
) {
19471979
this.logPrefix = logPrefix;
19481980
this.logContext = logContext;
@@ -1960,6 +1992,7 @@ private CoordinatorRuntime(
19601992
this.serializer = serializer;
19611993
this.compression = compression;
19621994
this.appendLingerMs = appendLingerMs;
1995+
this.executorService = executorService;
19631996
}
19641997

19651998
/**
@@ -2423,6 +2456,7 @@ public void close() throws Exception {
24232456
}
24242457
});
24252458
coordinators.clear();
2459+
executorService.shutdown();
24262460
Utils.closeQuietly(runtimeMetrics, "runtime metrics");
24272461
log.info("Coordinator runtime closed.");
24282462
}

coordinator-common/src/main/java/org/apache/kafka/coordinator/common/runtime/CoordinatorShardBuilder.java

+11
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,17 @@ CoordinatorShardBuilder<S, U> withTimer(
7575
CoordinatorTimer<Void, U> timer
7676
);
7777

78+
/**
79+
* Sets the coordinator executor.
80+
*
81+
* @param executor The coordinator executor.
82+
*
83+
* @return The builder.
84+
*/
85+
CoordinatorShardBuilder<S, U> withExecutor(
86+
CoordinatorExecutor<U> executor
87+
);
88+
7889
/**
7990
* Sets the coordinator metrics.
8091
*

0 commit comments

Comments
 (0)