forked from apache/flink
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[FLINK-8456] Add Scala API for Connected Streams with Broadcast State.
- Loading branch information
Showing
5 changed files
with
293 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
81 changes: 81 additions & 0 deletions
81
...-scala/src/main/scala/org/apache/flink/streaming/api/scala/BroadcastConnectedStream.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.flink.streaming.api.scala | ||
|
||
import org.apache.flink.annotation.PublicEvolving | ||
import org.apache.flink.api.common.typeinfo.TypeInformation | ||
import org.apache.flink.streaming.api.datastream.{BroadcastConnectedStream => JavaBCStream} | ||
import org.apache.flink.streaming.api.functions.co.{BroadcastProcessFunction, KeyedBroadcastProcessFunction} | ||
|
||
class BroadcastConnectedStream[IN1, IN2](javaStream: JavaBCStream[IN1, IN2]) { | ||
|
||
/** | ||
* Assumes as inputs a [[org.apache.flink.streaming.api.datastream.BroadcastStream]] and a | ||
* [[KeyedStream]] and applies the given [[KeyedBroadcastProcessFunction]] on them, thereby | ||
* creating a transformed output stream. | ||
* | ||
* @param function The [[KeyedBroadcastProcessFunction]] applied to each element in the stream. | ||
* @tparam KS The type of the keys in the keyed stream. | ||
* @tparam OUT The type of the output elements. | ||
* @return The transformed [[DataStream]]. | ||
*/ | ||
@PublicEvolving | ||
def process[KS, OUT: TypeInformation]( | ||
function: KeyedBroadcastProcessFunction[KS, IN1, IN2, OUT]) | ||
: DataStream[OUT] = { | ||
|
||
if (function == null) { | ||
throw new NullPointerException("KeyedBroadcastProcessFunction function must not be null.") | ||
} | ||
|
||
val outputTypeInfo : TypeInformation[OUT] = implicitly[TypeInformation[OUT]] | ||
asScalaStream(javaStream.process(function, outputTypeInfo)) | ||
} | ||
|
||
/** | ||
* Assumes as inputs a [[org.apache.flink.streaming.api.datastream.BroadcastStream]] | ||
* and a non-keyed [[DataStream]] and applies the given | ||
* [[org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction]] | ||
* on them, thereby creating a transformed output stream. | ||
* | ||
* @param function The [[BroadcastProcessFunction]] applied to each element in the stream. | ||
* @tparam OUT The type of the output elements. | ||
* @return The transformed { @link DataStream}. | ||
*/ | ||
@PublicEvolving | ||
def process[OUT: TypeInformation]( | ||
function: BroadcastProcessFunction[IN1, IN2, OUT]) | ||
: DataStream[OUT] = { | ||
|
||
if (function == null) { | ||
throw new NullPointerException("BroadcastProcessFunction function must not be null.") | ||
} | ||
|
||
val outputTypeInfo : TypeInformation[OUT] = implicitly[TypeInformation[OUT]] | ||
asScalaStream(javaStream.process(function, outputTypeInfo)) | ||
} | ||
|
||
/** | ||
* Returns a "closure-cleaned" version of the given function. Cleans only if closure cleaning | ||
* is not disabled in the [[org.apache.flink.api.common.ExecutionConfig]] | ||
*/ | ||
private[flink] def clean[F <: AnyRef](f: F) = { | ||
new StreamExecutionEnvironment(javaStream.getExecutionEnvironment).scalaClean(f) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
161 changes: 161 additions & 0 deletions
161
...ming-scala/src/test/scala/org/apache/flink/streaming/api/scala/BroadcastStateITCase.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.flink.streaming.api.scala | ||
|
||
import org.apache.flink.api.common.state.MapStateDescriptor | ||
import org.apache.flink.api.common.typeinfo.{BasicTypeInfo, TypeInformation} | ||
import org.apache.flink.streaming.api.TimeCharacteristic | ||
import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks | ||
import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction | ||
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction | ||
import org.apache.flink.streaming.api.watermark.Watermark | ||
import org.apache.flink.test.util.AbstractTestBase | ||
import org.apache.flink.util.Collector | ||
import org.junit.Assert.assertEquals | ||
import org.junit.{Assert, Test} | ||
|
||
/** | ||
* ITCase for the [[org.apache.flink.api.common.state.BroadcastState]]. | ||
*/ | ||
class BroadcastStateITCase extends AbstractTestBase { | ||
|
||
@Test | ||
@throws[Exception] | ||
def testConnectWithBroadcastTranslation(): Unit = { | ||
|
||
val timerTimestamp = 100000L | ||
|
||
val DESCRIPTOR = new MapStateDescriptor[Long, String]( | ||
"broadcast-state", | ||
BasicTypeInfo.LONG_TYPE_INFO.asInstanceOf[TypeInformation[Long]], | ||
BasicTypeInfo.STRING_TYPE_INFO) | ||
|
||
val expected = Map[Long, String]( | ||
0L -> "test:0", | ||
1L -> "test:1", | ||
2L -> "test:2", | ||
3L -> "test:3", | ||
4L -> "test:4", | ||
5L -> "test:5") | ||
|
||
val env = StreamExecutionEnvironment.getExecutionEnvironment | ||
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) | ||
|
||
val srcOne = env | ||
.generateSequence(0L, 5L) | ||
.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks[Long]() { | ||
|
||
override def extractTimestamp(element: Long, previousElementTimestamp: Long): Long = | ||
element | ||
|
||
override def checkAndGetNextWatermark(lastElement: Long, extractedTimestamp: Long) = | ||
new Watermark(extractedTimestamp) | ||
|
||
}) | ||
.keyBy((value: Long) => value) | ||
|
||
val srcTwo = env | ||
.fromCollection(expected.values.toSeq) | ||
.assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks[String]() { | ||
|
||
override def extractTimestamp(element: String, previousElementTimestamp: Long): Long = | ||
element.split(":")(1).toLong | ||
|
||
override def checkAndGetNextWatermark(lastElement: String, extractedTimestamp: Long) = | ||
new Watermark(extractedTimestamp) | ||
}) | ||
|
||
val broadcast = srcTwo.broadcast(DESCRIPTOR) | ||
// the timestamp should be high enough to trigger the timer after all the elements arrive. | ||
val output = srcOne.connect(broadcast) | ||
.process(new TestBroadcastProcessFunction(100000L, expected)) | ||
|
||
output | ||
.addSink(new TestSink(expected.size)) | ||
.setParallelism(1) | ||
env.execute | ||
} | ||
} | ||
|
||
class TestBroadcastProcessFunction( | ||
expectedTimestamp: Long, | ||
expectedBroadcastState: Map[Long, String]) | ||
extends KeyedBroadcastProcessFunction[Long, Long, String, String] { | ||
|
||
val localDescriptor = new MapStateDescriptor[Long, String]( | ||
"broadcast-state", | ||
BasicTypeInfo.LONG_TYPE_INFO.asInstanceOf[TypeInformation[Long]], | ||
BasicTypeInfo.STRING_TYPE_INFO) | ||
|
||
@throws[Exception] | ||
override def processElement( | ||
value: Long, | ||
ctx: KeyedBroadcastProcessFunction[Long, Long, String, String]#KeyedReadOnlyContext, | ||
out: Collector[String]): Unit = { | ||
|
||
ctx.timerService.registerEventTimeTimer(expectedTimestamp) | ||
} | ||
|
||
@throws[Exception] | ||
override def processBroadcastElement( | ||
value: String, | ||
ctx: KeyedBroadcastProcessFunction[Long, Long, String, String]#KeyedContext, | ||
out: Collector[String]): Unit = { | ||
|
||
val key = value.split(":")(1).toLong | ||
ctx.getBroadcastState(localDescriptor).put(key, value) | ||
} | ||
|
||
@throws[Exception] | ||
override def onTimer( | ||
timestamp: Long, | ||
ctx: KeyedBroadcastProcessFunction[Long, Long, String, String]#OnTimerContext, | ||
out: Collector[String]): Unit = { | ||
|
||
var map = Map[Long, String]() | ||
|
||
import scala.collection.JavaConversions._ | ||
for (entry <- ctx.getBroadcastState(localDescriptor).immutableEntries()) { | ||
val v = expectedBroadcastState.get(entry.getKey).get | ||
assertEquals(v, entry.getValue) | ||
map += (entry.getKey -> entry.getValue) | ||
} | ||
|
||
Assert.assertEquals(expectedBroadcastState, map) | ||
|
||
out.collect(timestamp.toString) | ||
} | ||
} | ||
|
||
class TestSink(val expectedOutputCounter: Int) extends RichSinkFunction[String] { | ||
|
||
var outputCounter: Int = 0 | ||
|
||
override def invoke(value: String) = { | ||
outputCounter = outputCounter + 1 | ||
} | ||
|
||
@throws[Exception] | ||
override def close(): Unit = { | ||
super.close() | ||
|
||
// make sure that all the timers fired | ||
assertEquals(expectedOutputCounter, outputCounter) | ||
} | ||
} |