Skip to content

Commit

Permalink
[FLINK-12963] [state-processor] Introduce OperatorStateSpec wrapper c…
Browse files Browse the repository at this point in the history
…lass

This class is a simple container class to represent an operator state
that is either still defined by a BootstrapTransformation, i.e. new
state that has not been written out yet, or an existing OperatorState.

Introducing this class improves readability of the code, instead of
using Eithers and Tuples that would not have clear semantics for the
user.
  • Loading branch information
tzulitai committed Jul 4, 2019
1 parent 31e827e commit 9f4e69f
Show file tree
Hide file tree
Showing 4 changed files with 156 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,12 @@

import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.core.fs.Path;
import org.apache.flink.runtime.checkpoint.OperatorState;
import org.apache.flink.runtime.jobgraph.OperatorID;
import org.apache.flink.runtime.state.StateBackend;
import org.apache.flink.state.api.output.MergeOperatorStates;
import org.apache.flink.state.api.output.SavepointOutputFormat;
import org.apache.flink.state.api.runtime.BootstrapTransformationWithID;
import org.apache.flink.state.api.runtime.metadata.ModifiableSavepointMetadata;
import org.apache.flink.util.Preconditions;

Expand Down Expand Up @@ -80,7 +79,7 @@ public <T> F withOperator(String uid, BootstrapTransformation<T> transformation)
public final void write(String path) {
final Path savepointPath = new Path(path);

List<Tuple2<OperatorID, BootstrapTransformation<?>>> newOperatorTransformations = metadata.getNewOperatorTransformations();
List<BootstrapTransformationWithID<?>> newOperatorTransformations = metadata.getNewOperators();
DataSet<OperatorState> newOperatorStates = writeOperatorStates(newOperatorTransformations, savepointPath);

List<OperatorState> existingOperators = metadata.getExistingOperators();
Expand Down Expand Up @@ -109,11 +108,13 @@ private DataSet<OperatorState> unionOperatorStates(DataSet<OperatorState> newOpe
}

private DataSet<OperatorState> writeOperatorStates(
List<Tuple2<OperatorID, BootstrapTransformation<?>>> newOperatorTransformations,
List<BootstrapTransformationWithID<?>> newOperatorStates,
Path savepointWritePath) {
return newOperatorTransformations
return newOperatorStates
.stream()
.map(transformation -> transformation.f1.writeOperatorState(transformation.f0, stateBackend, metadata, savepointWritePath))
.map(newOperatorState -> newOperatorState
.getBootstrapTransformation()
.writeOperatorState(newOperatorState.getOperatorID(), stateBackend, metadata, savepointWritePath))
.reduce(DataSet::union)
.orElseThrow(() -> new IllegalStateException("Savepoint's must contain at least one operator"));
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.flink.state.api.runtime;

import org.apache.flink.annotation.Internal;
import org.apache.flink.runtime.jobgraph.OperatorID;
import org.apache.flink.state.api.BootstrapTransformation;
import org.apache.flink.util.Preconditions;

/**
* A simple container class that represents a newly bootstrapped operator state within savepoints.
* It wraps the target {@link OperatorID} for the bootstrapped operator, as well as the {@link BootstrapTransformation}
* that defines how the state is bootstrapped.
*/
@Internal
public class BootstrapTransformationWithID<T> {

private final OperatorID operatorID;
private final BootstrapTransformation<T> bootstrapTransformation;

public BootstrapTransformationWithID(OperatorID operatorID, BootstrapTransformation<T> bootstrapTransformation) {
this.operatorID = Preconditions.checkNotNull(operatorID);
this.bootstrapTransformation = Preconditions.checkNotNull(bootstrapTransformation);
}

public OperatorID getOperatorID() {
return operatorID;
}

public BootstrapTransformation<T> getBootstrapTransformation() {
return bootstrapTransformation;
}
}
Original file line number Diff line number Diff line change
@@ -1,38 +1,35 @@
package org.apache.flink.state.api.runtime.metadata;

import org.apache.flink.annotation.Internal;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.runtime.checkpoint.MasterState;
import org.apache.flink.runtime.checkpoint.OperatorState;
import org.apache.flink.runtime.jobgraph.OperatorID;
import org.apache.flink.state.api.BootstrapTransformation;
import org.apache.flink.state.api.runtime.BootstrapTransformationWithID;
import org.apache.flink.state.api.runtime.OperatorIDGenerator;
import org.apache.flink.types.Either;

import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* Savepoint metadata that can be modified.
*/
@Internal
public class ModifiableSavepointMetadata extends SavepointMetadata {

private transient Map<OperatorID, Either<OperatorState, BootstrapTransformation<?>>> operatorStateIndex;
private transient Map<OperatorID, OperatorStateSpec> operatorStateIndex;

public ModifiableSavepointMetadata(int maxParallelism, Collection<MasterState> masterStates, Collection<OperatorState> initialStates) {
super(maxParallelism, masterStates);

this.operatorStateIndex = new HashMap<>(initialStates.size());

for (OperatorState operatorState : initialStates) {
operatorStateIndex.put(operatorState.getOperatorID(), Either.Left(operatorState));
}
initialStates.forEach(existingState -> operatorStateIndex.put(
existingState.getOperatorID(),
OperatorStateSpec.existing(existingState)));
}

/**
Expand All @@ -43,12 +40,12 @@ public ModifiableSavepointMetadata(int maxParallelism, Collection<MasterState> m
public OperatorState getOperatorState(String uid) throws IOException {
OperatorID operatorID = OperatorIDGenerator.fromUid(uid);

Either<OperatorState, BootstrapTransformation<?>> operatorState = operatorStateIndex.get(operatorID);
if (operatorState == null || operatorState.isRight()) {
OperatorStateSpec operatorState = operatorStateIndex.get(operatorID);
if (operatorState == null || operatorState.isNewStateTransformation()) {
throw new IOException("Savepoint does not contain state with operator uid " + uid);
}

return operatorState.left();
return operatorState.asExistingState();
}

public void removeOperator(String uid) {
Expand All @@ -62,7 +59,7 @@ public void addOperator(String uid, BootstrapTransformation<?> transformation) {
throw new IllegalArgumentException("The savepoint already contains uid " + uid + ". All uid's must be unique");
}

operatorStateIndex.put(id, Either.Right(transformation));
operatorStateIndex.put(id, OperatorStateSpec.newWithTransformation(new BootstrapTransformationWithID<>(id, transformation)));
}

/**
Expand All @@ -72,21 +69,20 @@ public List<OperatorState> getExistingOperators() {
return operatorStateIndex
.values()
.stream()
.filter(Either::isLeft)
.map(Either::left)
.filter(OperatorStateSpec::isExistingState)
.map(OperatorStateSpec::asExistingState)
.collect(Collectors.toList());
}

/**
* @return List of new operator states for the savepoint, represented by their target {@link OperatorID} and {@link BootstrapTransformation}.
*/
public List<Tuple2<OperatorID, BootstrapTransformation<?>>> getNewOperatorTransformations() {
Stream<Tuple2<OperatorID, BootstrapTransformation<?>>> transformations = operatorStateIndex
.entrySet()
public List<BootstrapTransformationWithID<?>> getNewOperators() {
return operatorStateIndex
.values()
.stream()
.filter(entry -> entry.getValue().isRight())
.map(entry -> Tuple2.of(entry.getKey(), entry.getValue().right()));

return transformations.collect(Collectors.toList());
.filter(OperatorStateSpec::isNewStateTransformation)
.map(OperatorStateSpec::asNewStateTransformation)
.collect(Collectors.toList());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.flink.state.api.runtime.metadata;

import org.apache.flink.annotation.Internal;
import org.apache.flink.runtime.checkpoint.OperatorState;
import org.apache.flink.runtime.jobgraph.OperatorID;
import org.apache.flink.state.api.runtime.BootstrapTransformationWithID;
import org.apache.flink.util.Preconditions;

import javax.annotation.Nullable;

/**
* This class specifies an operator state maintained by {@link SavepointMetadata}.
* An operator state is either represented as an existing {@link OperatorState}, or a
* {@link org.apache.flink.state.api.BootstrapTransformation} that will be used to create it.
*/
@Internal
class OperatorStateSpec {

private final OperatorID id;

@Nullable
private final OperatorState existingState;

@Nullable
private final BootstrapTransformationWithID<?> newOperatorStateTransformation;

static OperatorStateSpec existing(OperatorState existingState) {
return new OperatorStateSpec(Preconditions.checkNotNull(existingState));
}

static OperatorStateSpec newWithTransformation(BootstrapTransformationWithID<?> transformation) {
return new OperatorStateSpec(Preconditions.checkNotNull(transformation));
}

private OperatorStateSpec(OperatorState existingState) {
this.id = existingState.getOperatorID();
this.existingState = existingState;
this.newOperatorStateTransformation = null;
}

private OperatorStateSpec(BootstrapTransformationWithID<?> transformation) {
this.id = transformation.getOperatorID();
this.newOperatorStateTransformation = transformation;
this.existingState = null;
}

boolean isExistingState() {
return existingState != null;
}

boolean isNewStateTransformation() {
return !isExistingState();
}

OperatorState asExistingState() {
Preconditions.checkState(isExistingState(), "OperatorState %s is not an existing state.", id);
return existingState;
}

@SuppressWarnings("unchecked")
<T> BootstrapTransformationWithID<T> asNewStateTransformation() {
Preconditions.checkState(isNewStateTransformation(), "OperatorState %s is not a new state defined with BootstrapTransformation", id);
return (BootstrapTransformationWithID<T>) newOperatorStateTransformation;
}
}

0 comments on commit 9f4e69f

Please sign in to comment.