Skip to content

Commit

Permalink
Merge pull request apache#8360: [BEAM-7028] Add Combine Java load tests
Browse files Browse the repository at this point in the history
  • Loading branch information
lgajowy authored May 8, 2019
2 parents 0edab50 + 5451256 commit d537bd0
Show file tree
Hide file tree
Showing 2 changed files with 229 additions and 6 deletions.
216 changes: 216 additions & 0 deletions .test-infra/jenkins/job_LoadTests_Combine_Java.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import CommonJobProperties as commonJobProperties
import CommonTestProperties
import LoadTestsBuilder as loadTestsBuilder
import PhraseTriggeringPostCommitBuilder
import CronJobBuilder

def commonLoadTestConfig = { jobType, isStreaming ->
[
[
title : 'Load test: 2GB of 10B records',
itClass : 'org.apache.beam.sdk.loadtests.CombineLoadTest',
runner : CommonTestProperties.Runner.DATAFLOW,
jobProperties: [
project : 'apache-beam-testing',
appName : "load_tests_Java_Dataflow_${jobType}_Combine_1",
tempLocation : 'gs://temp-storage-for-perf-tests/loadtests',
publishToBigQuery : true,
bigQueryDataset : 'load_test',
bigQueryTable : "java_dataflow_${jobType}_Combine_1",
sourceOptions : """
{
"numRecords": 200000000,
"keySizeBytes": 1,
"valueSizeBytes": 9
}
""".trim().replaceAll("\\s", ""),
fanout : 1,
iterations : 1,
topCount : 20,
maxNumWorkers : 5,
numWorkers : 5,
autoscalingAlgorithm: "NONE",
perKeyCombiner : "TOP_LARGEST",
streaming : isStreaming
]
],
[
title : 'Load test: 2GB of 100B records',
itClass : 'org.apache.beam.sdk.loadtests.CombineLoadTest',
runner : CommonTestProperties.Runner.DATAFLOW,
jobProperties: [
project : 'apache-beam-testing',
appName : "load_tests_Java_Dataflow_${jobType}_Combine_2",
tempLocation : 'gs://temp-storage-for-perf-tests/loadtests',
publishToBigQuery : true,
bigQueryDataset : 'load_test',
bigQueryTable : "java_dataflow_${jobType}_Combine_2",
sourceOptions : """
{
"numRecords": 20000000,
"keySizeBytes": 10,
"valueSizeBytes": 90
}
""".trim().replaceAll("\\s", ""),
fanout : 1,
iterations : 1,
topCount : 20,
maxNumWorkers : 5,
numWorkers : 5,
autoscalingAlgorithm: "NONE",
perKeyCombiner : "TOP_LARGEST",
streaming : isStreaming
]
],
[

title : 'Load test: 2GB of 100kB records',
itClass : 'org.apache.beam.sdk.loadtests.CombineLoadTest',
runner : CommonTestProperties.Runner.DATAFLOW,
jobProperties: [
project : 'apache-beam-testing',
appName : "load_tests_Java_Dataflow_${jobType}_Combine_3",
tempLocation : 'gs://temp-storage-for-perf-tests/loadtests',
publishToBigQuery : true,
bigQueryDataset : 'load_test',
bigQueryTable : "java_dataflow_${jobType}_Combine_3",
sourceOptions : """
{
"numRecords": 2000,
"keySizeBytes": 100000,
"valueSizeBytes": 900000
}
""".trim().replaceAll("\\s", ""),
fanout : 1,
iterations : 1,
topCount : 20,
maxNumWorkers : 5,
numWorkers : 5,
autoscalingAlgorithm: "NONE",
perKeyCombiner : "TOP_LARGEST",
streaming : isStreaming
]

],
[
title : 'Load test: fanout 4 times with 2GB 10-byte records total',
itClass : 'org.apache.beam.sdk.loadtests.CombineLoadTest',
runner : CommonTestProperties.Runner.DATAFLOW,
jobProperties: [
project : 'apache-beam-testing',
appName : "load_tests_Java_Dataflow_${jobType}_Combine_4",
tempLocation : 'gs://temp-storage-for-perf-tests/loadtests',
publishToBigQuery : true,
bigQueryDataset : 'load_test',
bigQueryTable : "java_dataflow_${jobType}_Combine_4",
sourceOptions : """
{
"numRecords": 5000000,
"keySizeBytes": 10,
"valueSizeBytes": 90
}
""".trim().replaceAll("\\s", ""),
fanout : 4,
iterations : 1,
topCount : 20,
maxNumWorkers : 16,
numWorkers : 16,
autoscalingAlgorithm: "NONE",
perKeyCombiner : "TOP_LARGEST",
streaming : isStreaming
]
],
[
title : 'Load test: fanout 8 times with 2GB 10-byte records total',
itClass : 'org.apache.beam.sdk.loadtests.CombineLoadTest',
runner : CommonTestProperties.Runner.DATAFLOW,
jobProperties: [
project : 'apache-beam-testing',
appName : "load_tests_Java_Dataflow_${jobType}_Combine_5",
tempLocation : 'gs://temp-storage-for-perf-tests/loadtests',
publishToBigQuery : true,
bigQueryDataset : 'load_test',
bigQueryTable : "java_dataflow_${jobType}_Combine_5",
sourceOptions : """
{
"numRecords": 2500000,
"keySizeBytes": 10,
"valueSizeBytes": 90
}
""".trim().replaceAll("\\s", ""),
fanout : 8,
iterations : 1,
topCount : 20,
maxNumWorkers : 16,
numWorkers : 16,
autoscalingAlgorithm: "NONE",
perKeyCombiner : "TOP_LARGEST",
streaming : isStreaming
]
]
]
}


def batchLoadTestJob = { scope, triggeringContext ->
scope.description('Runs Java Combine load tests on Dataflow runner in batch mode')
commonJobProperties.setTopLevelMainJobProperties(scope, 'master', 240)

for (testConfiguration in commonLoadTestConfig('batch', false)) {
loadTestsBuilder.loadTest(scope, testConfiguration.title, testConfiguration.runner, CommonTestProperties.SDK.JAVA, testConfiguration.jobProperties, testConfiguration.itClass, triggeringContext)
}
}

def streamingLoadTestJob = {scope, triggeringContext ->
scope.description('Runs Java Combine load tests on Dataflow runner in streaming mode')
commonJobProperties.setTopLevelMainJobProperties(scope, 'master', 240)

for (testConfiguration in commonLoadTestConfig('streaming', true)) {
testConfiguration.jobProperties << [inputWindowDurationSec: 1200]
loadTestsBuilder.loadTest(scope, testConfiguration.title, testConfiguration.runner, CommonTestProperties.SDK.JAVA, testConfiguration.jobProperties, testConfiguration.itClass, triggeringContext)
}
}

CronJobBuilder.cronJob('beam_LoadTests_Java_Combine_Dataflow_Batch', 'H 12 * * *', this) {
batchLoadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT)
}

CronJobBuilder.cronJob('beam_LoadTests_Java_Combine_Dataflow_Streaming', 'H 12 * * *', this) {
streamingLoadTestJob(delegate, CommonTestProperties.TriggeringContext.POST_COMMIT)
}

PhraseTriggeringPostCommitBuilder.postCommitJob(
'beam_LoadTests_Java_Combine_Dataflow_Batch',
'Run Load Tests Java Combine Dataflow Batch',
'Load Tests Java Combine Dataflow Batch suite',
this
) {
batchLoadTestJob(delegate, CommonTestProperties.TriggeringContext.PR)
}

PhraseTriggeringPostCommitBuilder.postCommitJob(
'beam_LoadTests_Java_Combine_Dataflow_Streaming',
'Run Load Tests Java Combine Dataflow Streaming',
'Load Tests Java Combine Dataflow Streaming suite',
this
) {
streamingLoadTestJob(delegate, CommonTestProperties.TriggeringContext.PR)
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,16 @@ public class CombineLoadTest extends LoadTest<CombineLoadTest.Options> {

private static final String METRICS_NAMESPACE = "combine";

private enum CombinerType {
/** Enumerates per-key combiners available in the test. */
public enum CombinerType {
TOP_LARGEST,
MEAN,
SUM,
COUNT
}

/** Pipeline options specific for this test. */
interface Options extends LoadTestOptions {
public interface Options extends LoadTestOptions {

@Description("Number consequent of ParDo operations (SyntheticSteps) to be performed.")
@Default.Integer(1)
Expand All @@ -91,14 +92,20 @@ interface Options extends LoadTestOptions {

@Description("Per key combiner type.")
@Default.Enum("MEAN")
CombinerType getPerKeyCombinerType();
CombinerType getPerKeyCombiner();

void setPerKeyCombinerType(CombinerType combinerType);
void setPerKeyCombiner(CombinerType combinerType);

@Description("Number of top results to combine (if applicable).")
Integer getTopCount();

void setTopCount(Integer topCount);

@Description("Number of reiterations over the values to perform.")
@Default.Integer(1)
Integer getIterations();

void setIterations(Integer iterations);
}

private CombineLoadTest(String[] args) throws IOException {
Expand All @@ -124,13 +131,13 @@ protected void loadTest() throws IOException {
for (int i = 0; i < options.getFanout(); i++) {
applyStepIfPresent(input, format("Step: %d", i), syntheticStep)
.apply(format("Convert to Long: %d", i), MapElements.via(new ByteValueToLong()))
.apply(format("Combine: %d", i), getPerKeyCombiner(options.getPerKeyCombinerType()))
.apply(format("Combine: %d", i), getPerKeyCombiner(options.getPerKeyCombiner()))
.apply(
"Collect end time metric", ParDo.of(new TimeMonitor<>(METRICS_NAMESPACE, "runtime")));
}
}

private PTransform<PCollection<KV<byte[], Long>>, ? extends PCollection> getPerKeyCombiner(
public PTransform<PCollection<KV<byte[], Long>>, ? extends PCollection> getPerKeyCombiner(
CombinerType combinerType) {
switch (combinerType) {
case MEAN:
Expand Down

0 comments on commit d537bd0

Please sign in to comment.