18
18
package org .apache .spark .mllib .stat
19
19
20
20
import org .apache .spark .SparkFunSuite
21
- import org .apache .spark .mllib .stat .test .{StreamingTest , StreamingTestResult , StudentTTest , WelchTTest }
21
+ import org .apache .spark .mllib .stat .test .{StreamingTest , StreamingTestResult , StudentTTest ,
22
+ WelchTTest , BinarySample }
22
23
import org .apache .spark .streaming .TestSuiteBase
23
24
import org .apache .spark .streaming .dstream .DStream
24
25
import org .apache .spark .util .StatCounter
@@ -48,7 +49,7 @@ class StreamingTestSuite extends SparkFunSuite with TestSuiteBase {
48
49
49
50
// setup and run the model
50
51
val ssc = setupStreams(
51
- input, (inputDStream : DStream [( Boolean , Double ) ]) => model.registerStream(inputDStream))
52
+ input, (inputDStream : DStream [BinarySample ]) => model.registerStream(inputDStream))
52
53
val outputBatches = runStreams[StreamingTestResult ](ssc, numBatches, numBatches)
53
54
54
55
assert(outputBatches.flatten.forall(res =>
@@ -75,7 +76,7 @@ class StreamingTestSuite extends SparkFunSuite with TestSuiteBase {
75
76
76
77
// setup and run the model
77
78
val ssc = setupStreams(
78
- input, (inputDStream : DStream [( Boolean , Double ) ]) => model.registerStream(inputDStream))
79
+ input, (inputDStream : DStream [BinarySample ]) => model.registerStream(inputDStream))
79
80
val outputBatches = runStreams[StreamingTestResult ](ssc, numBatches, numBatches)
80
81
81
82
assert(outputBatches.flatten.forall(res =>
@@ -102,7 +103,7 @@ class StreamingTestSuite extends SparkFunSuite with TestSuiteBase {
102
103
103
104
// setup and run the model
104
105
val ssc = setupStreams(
105
- input, (inputDStream : DStream [( Boolean , Double ) ]) => model.registerStream(inputDStream))
106
+ input, (inputDStream : DStream [BinarySample ]) => model.registerStream(inputDStream))
106
107
val outputBatches = runStreams[StreamingTestResult ](ssc, numBatches, numBatches)
107
108
108
109
@@ -130,7 +131,7 @@ class StreamingTestSuite extends SparkFunSuite with TestSuiteBase {
130
131
131
132
// setup and run the model
132
133
val ssc = setupStreams(
133
- input, (inputDStream : DStream [( Boolean , Double ) ]) => model.registerStream(inputDStream))
134
+ input, (inputDStream : DStream [BinarySample ]) => model.registerStream(inputDStream))
134
135
val outputBatches = runStreams[StreamingTestResult ](ssc, numBatches, numBatches)
135
136
136
137
assert(outputBatches.flatten.forall(res =>
@@ -157,7 +158,7 @@ class StreamingTestSuite extends SparkFunSuite with TestSuiteBase {
157
158
// setup and run the model
158
159
val ssc = setupStreams(
159
160
input,
160
- (inputDStream : DStream [( Boolean , Double ) ]) => model.summarizeByKeyAndWindow(inputDStream))
161
+ (inputDStream : DStream [BinarySample ]) => model.summarizeByKeyAndWindow(inputDStream))
161
162
val outputBatches = runStreams[(Boolean , StatCounter )](ssc, numBatches, numBatches)
162
163
val outputCounts = outputBatches.flatten.map(_._2.count)
163
164
@@ -190,7 +191,7 @@ class StreamingTestSuite extends SparkFunSuite with TestSuiteBase {
190
191
191
192
// setup and run the model
192
193
val ssc = setupStreams(
193
- input, (inputDStream : DStream [( Boolean , Double ) ]) => model.dropPeacePeriod(inputDStream))
194
+ input, (inputDStream : DStream [BinarySample ]) => model.dropPeacePeriod(inputDStream))
194
195
val outputBatches = runStreams[(Boolean , Double )](ssc, numBatches, numBatches)
195
196
196
197
assert(outputBatches.flatten.length == (numBatches - peacePeriod) * pointsPerBatch)
@@ -210,11 +211,11 @@ class StreamingTestSuite extends SparkFunSuite with TestSuiteBase {
210
211
.setPeacePeriod(0 )
211
212
212
213
val input = generateTestData(numBatches, pointsPerBatch, meanA, stdevA, meanB, stdevB, 42 )
213
- .map(batch => batch.filter(_._1 )) // only keep one test group
214
+ .map(batch => batch.filter(_.isExperiment )) // only keep one test group
214
215
215
216
// setup and run the model
216
217
val ssc = setupStreams(
217
- input, (inputDStream : DStream [( Boolean , Double ) ]) => model.registerStream(inputDStream))
218
+ input, (inputDStream : DStream [BinarySample ]) => model.registerStream(inputDStream))
218
219
val outputBatches = runStreams[StreamingTestResult ](ssc, numBatches, numBatches)
219
220
220
221
assert(outputBatches.flatten.forall(result => (result.pValue - 1.0 ).abs < 0.001 ))
@@ -228,13 +229,13 @@ class StreamingTestSuite extends SparkFunSuite with TestSuiteBase {
228
229
stdevA : Double ,
229
230
meanB : Double ,
230
231
stdevB : Double ,
231
- seed : Int ): (IndexedSeq [IndexedSeq [( Boolean , Double ) ]]) = {
232
+ seed : Int ): (IndexedSeq [IndexedSeq [BinarySample ]]) = {
232
233
val rand = new XORShiftRandom (seed)
233
234
val numTrues = pointsPerBatch / 2
234
235
val data = (0 until numBatches).map { i =>
235
- (0 until numTrues).map { idx => (true , meanA + stdevA * rand.nextGaussian())} ++
236
+ (0 until numTrues).map { idx => BinarySample (true , meanA + stdevA * rand.nextGaussian())} ++
236
237
(pointsPerBatch / 2 until pointsPerBatch).map { idx =>
237
- (false , meanB + stdevB * rand.nextGaussian())
238
+ BinarySample (false , meanB + stdevB * rand.nextGaussian())
238
239
}
239
240
}
240
241
0 commit comments