From 041aab5af0c44394bb595e1146ea2cb96732baaa Mon Sep 17 00:00:00 2001 From: "rick.parker" Date: Mon, 16 May 2016 10:54:25 +0100 Subject: [PATCH] Serialization tokenization support for large objects (e.g. entries in ServiceHub) --- core/build.gradle | 3 + core/src/main/kotlin/core/node/ServiceHub.kt | 3 +- .../core/serialization/SerializationToken.kt | 98 +++++++++++++++++++ .../serialization/SerializationTokenTest.kt | 79 +++++++++++++++ 4 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 core/src/main/kotlin/core/serialization/SerializationToken.kt create mode 100644 core/src/test/kotlin/core/serialization/SerializationTokenTest.kt diff --git a/core/build.gradle b/core/build.gradle index 1f0db4ac98d..5894f309fe4 100644 --- a/core/build.gradle +++ b/core/build.gradle @@ -31,6 +31,9 @@ dependencies { // Thread safety annotations compile "com.google.code.findbugs:jsr305:3.0.1" + // AssertJ: for fluent assertions for testing + testCompile "org.assertj:assertj-core:3.4.1" + // SLF4J: Logging framework. compile "org.slf4j:slf4j-jdk14:1.7.13" diff --git a/core/src/main/kotlin/core/node/ServiceHub.kt b/core/src/main/kotlin/core/node/ServiceHub.kt index b5ead2d5e0d..ec36bb412c0 100644 --- a/core/src/main/kotlin/core/node/ServiceHub.kt +++ b/core/src/main/kotlin/core/node/ServiceHub.kt @@ -1,6 +1,5 @@ package core.node -import core.* import core.contracts.* import core.crypto.SecureHash import core.messaging.MessagingService @@ -14,6 +13,8 @@ import java.time.Clock * mocked out. This class is useful to pass to chunks of pluggable code that might have need of many different kinds of * functionality and you don't want to hard-code which types in the interface. * + * All services exposed to protocols (public view) need to implement [SerializeAsToken] or similar to avoid being serialized in checkpoints. + * * TODO: Split into a public (to contracts etc) and private (to node) view */ interface ServiceHub { diff --git a/core/src/main/kotlin/core/serialization/SerializationToken.kt b/core/src/main/kotlin/core/serialization/SerializationToken.kt new file mode 100644 index 00000000000..8b50cdf64ae --- /dev/null +++ b/core/src/main/kotlin/core/serialization/SerializationToken.kt @@ -0,0 +1,98 @@ +package core.serialization + +import com.esotericsoftware.kryo.DefaultSerializer +import com.esotericsoftware.kryo.Kryo +import com.esotericsoftware.kryo.KryoException +import com.esotericsoftware.kryo.Serializer +import com.esotericsoftware.kryo.io.Input +import com.esotericsoftware.kryo.io.Output +import java.lang.ref.WeakReference +import java.util.* + +/** + * The interfaces and classes in this file allow large, singleton style classes to + * mark themselves as needing converting to some form of token representation in the serialised form + * and converting back again when deserialising. + * + * Typically these classes would be used for node services and subsystems that might become reachable from + * Fibers and thus sucked into serialization when they are checkpointed. + */ + +/** + * This interface should be implemented by classes that want to substitute a token representation of themselves if + * they are serialized because they have a lot of internal state that does not serialize (well). + * + * This models a similar pattern to the readReplace/writeReplace methods in Java serialization. + * + * With Kryo serialisation, these classes should also annotate themselves with @DefaultSerializer. See below. + * + */ +interface SerializeAsToken { + val token: SerializationToken +} + +/** + * This represents a token in the serialized stream for an instance of a type that implements [SerializeAsToken] + */ +interface SerializationToken { + fun fromToken(): Any +} + +/** + * A Kryo serializer for [SerializeAsToken] implementations. + * + * Annotate the [SerializeAsToken] with @DefaultSerializer(SerializeAsTokenSerializer::class) + */ +class SerializeAsTokenSerializer : Serializer() { + override fun write(kryo: Kryo, output: Output, obj: T) { + kryo.writeClassAndObject(output, obj.token) + } + + override fun read(kryo: Kryo, input: Input, type: Class): T { + val token = (kryo.readClassAndObject(input) as? SerializationToken) ?: throw KryoException("Non-token read for tokenized type: ${type.name}") + val fromToken = token.fromToken() + if (type.isAssignableFrom(fromToken.javaClass)) { + return type.cast(fromToken) + } else { + throw KryoException("Token read did not return tokenized type: ${type.name}") + } + } +} + +/** + * A class representing a [SerializationToken] for some object that is not serializable but can be re-created or looked up + * (when deserialized) via a [String] key. + */ +private data class SerializationStringToken(private val key: String, private val className: String) : SerializationToken { + + constructor(key: String, toBeProxied: SerializeAsStringToken) : this(key, toBeProxied.javaClass.name) { + tokenized.put(this, WeakReference(toBeProxied)) + } + + companion object { + val tokenized = Collections.synchronizedMap(WeakHashMap>()) + } + + override fun fromToken(): Any = tokenized.get(this)?.get() ?: + throw IllegalStateException("Unable to find tokenized instance of ${className} for key $key") +} + +/** + * A base class for implementing large objects / components / services that need to serialize themselves to a string token + * to indicate which instance the token is a serialized form of. + * + * This class will also double check that the class is annotated for Kryo serialization. Note it does this on every + * instance constructed but given this is designed to represent heavyweight services or components, this should not be significant. + */ +abstract class SerializeAsStringToken(val key: String) : SerializeAsToken { + + init { + // Verify we have the annotation + val annotation = javaClass.getAnnotation(DefaultSerializer::class.java) + if (annotation == null || annotation.value.java.name != SerializeAsTokenSerializer::class.java.name) { + throw IllegalStateException("${this.javaClass.name} is not annotated with @${DefaultSerializer::class.java.simpleName} set to ${SerializeAsTokenSerializer::class.java.simpleName}") + } + } + + override val token: SerializationToken = SerializationStringToken(key, this) +} \ No newline at end of file diff --git a/core/src/test/kotlin/core/serialization/SerializationTokenTest.kt b/core/src/test/kotlin/core/serialization/SerializationTokenTest.kt new file mode 100644 index 00000000000..64431bb8309 --- /dev/null +++ b/core/src/test/kotlin/core/serialization/SerializationTokenTest.kt @@ -0,0 +1,79 @@ +package core.serialization + +import com.esotericsoftware.kryo.DefaultSerializer +import org.assertj.core.api.Assertions.assertThat +import org.junit.Test +import kotlin.test.assertEquals +import kotlin.test.assertNotEquals + +class SerializationTokenTest { + + // Large tokenizable object so we can tell from the smaller number of serialized bytes it was actually tokenized + @DefaultSerializer(SerializeAsTokenSerializer::class) + private class LargeTokenizable(size: Int) : SerializeAsStringToken(size.toString()) { + val bytes = OpaqueBytes(ByteArray(size)) + + override fun hashCode() = bytes.bits.size + + override fun equals(other: Any?) = other is LargeTokenizable && other.bytes.bits.size == this.bytes.bits.size + } + + @Test + fun `write token and read tokenizable`() { + val numBytes = 1024 + val tokenizableBefore = LargeTokenizable(numBytes) + val serializedBytes = tokenizableBefore.serialize() + assertThat(serializedBytes.size).isLessThan(numBytes) + val tokenizableAfter = serializedBytes.deserialize() + assertEquals(tokenizableBefore, tokenizableAfter) + } + + @Test + fun `check same sized tokenizable equal`() { + val tokenizableBefore = LargeTokenizable(1024) + val tokenizableAfter = LargeTokenizable(1024) + assertEquals(tokenizableBefore, tokenizableAfter) + } + + @Test + fun `check different sized tokenizable not equal`() { + val tokenizableBefore = LargeTokenizable(1024) + val tokenizableAfter = LargeTokenizable(1025) + assertNotEquals(tokenizableBefore, tokenizableAfter) + } + + @DefaultSerializer(SerializeAsTokenSerializer::class) + private class IntegerSerializeAsKeyedToken(val value: Int) : SerializeAsStringToken(value.toString()) + + @Test + fun `write and read keyed`() { + val tokenizableBefore1 = IntegerSerializeAsKeyedToken(123) + val tokenizableBefore2 = IntegerSerializeAsKeyedToken(456) + + val serializedBytes1 = tokenizableBefore1.serialize() + val tokenizableAfter1 = serializedBytes1.deserialize() + val serializedBytes2 = tokenizableBefore2.serialize() + val tokenizableAfter2 = serializedBytes2.deserialize() + + assertThat(tokenizableAfter1).isSameAs(tokenizableBefore1) + assertThat(tokenizableAfter2).isSameAs(tokenizableBefore2) + } + + @DefaultSerializer(SerializeAsTokenSerializer::class) + private class UnitSerializeAsSingletonToken : SerializeAsStringToken("Unit0") + + @Test + fun `write and read singleton`() { + val tokenizableBefore = UnitSerializeAsSingletonToken() + val serializedBytes = tokenizableBefore.serialize() + val tokenizableAfter = serializedBytes.deserialize() + assertThat(tokenizableAfter).isSameAs(tokenizableBefore) + } + + private class UnannotatedSerializeAsSingletonToken : SerializeAsStringToken("Unannotated0") + + @Test(expected = IllegalStateException::class) + fun `unannotated throws`() { + val tokenizableBefore = UnannotatedSerializeAsSingletonToken() + } +} \ No newline at end of file