Skip to content

Commit

Permalink
Merge https://github.com/apache/beam into time-output-timestamp
Browse files Browse the repository at this point in the history
  • Loading branch information
xubii committed Sep 27, 2019
2 parents f72a29a + e78943c commit fd72944
Show file tree
Hide file tree
Showing 400 changed files with 4,204 additions and 3,616 deletions.
108 changes: 68 additions & 40 deletions .test-infra/jenkins/job_PerformanceTests_FileBasedIO_IT.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@ def jobs = [
githubTitle : 'Java TextIO Performance Test',
githubTriggerPhrase: 'Run Java TextIO Performance Test',
pipelineOptions : [
bigQueryDataset: 'beam_performance',
bigQueryTable : 'textioit_results',
numberOfRecords: '1000000'
bigQueryDataset : 'beam_performance',
bigQueryTable : 'textioit_results',
numberOfRecords : '1000000',
numWorkers : '5',
autoscalingAlgorithm: 'NONE'
]

],
Expand All @@ -39,10 +41,12 @@ def jobs = [
githubTitle : 'Java CompressedTextIO Performance Test',
githubTriggerPhrase: 'Run Java CompressedTextIO Performance Test',
pipelineOptions : [
bigQueryDataset: 'beam_performance',
bigQueryTable : 'compressed_textioit_results',
numberOfRecords: '1000000',
compressionType: 'GZIP'
bigQueryDataset : 'beam_performance',
bigQueryTable : 'compressed_textioit_results',
numberOfRecords : '1000000',
compressionType : 'GZIP',
numWorkers : '5',
autoscalingAlgorithm: 'NONE'
]
],
[
Expand All @@ -57,7 +61,9 @@ def jobs = [
reportGcsPerformanceMetrics: 'true',
gcsPerformanceMetrics : 'true',
numberOfRecords : '1000000',
numberOfShards : '1000'
numberOfShards : '1000',
numWorkers : '5',
autoscalingAlgorithm : 'NONE'
]

],
Expand All @@ -68,9 +74,11 @@ def jobs = [
githubTitle : 'Java AvroIO Performance Test',
githubTriggerPhrase: 'Run Java AvroIO Performance Test',
pipelineOptions : [
numberOfRecords: '1000000',
bigQueryDataset: 'beam_performance',
bigQueryTable : 'avroioit_results',
numberOfRecords : '1000000',
bigQueryDataset : 'beam_performance',
bigQueryTable : 'avroioit_results',
numWorkers : '5',
autoscalingAlgorithm: 'NONE'
]
],
[
Expand All @@ -80,9 +88,11 @@ def jobs = [
githubTitle : 'Java TFRecordIO Performance Test',
githubTriggerPhrase: 'Run Java TFRecordIO Performance Test',
pipelineOptions : [
bigQueryDataset: 'beam_performance',
bigQueryTable : 'tfrecordioit_results',
numberOfRecords: '1000000'
bigQueryDataset : 'beam_performance',
bigQueryTable : 'tfrecordioit_results',
numberOfRecords : '1000000',
numWorkers : '5',
autoscalingAlgorithm: 'NONE'
]
],
[
Expand All @@ -92,10 +102,12 @@ def jobs = [
githubTitle : 'Java XmlIOPerformance Test',
githubTriggerPhrase: 'Run Java XmlIO Performance Test',
pipelineOptions : [
bigQueryDataset: 'beam_performance',
bigQueryTable : 'xmlioit_results',
numberOfRecords: '100000000',
charset : 'UTF-8'
bigQueryDataset : 'beam_performance',
bigQueryTable : 'xmlioit_results',
numberOfRecords : '100000000',
charset : 'UTF-8',
numWorkers : '5',
autoscalingAlgorithm: 'NONE'
]
],
[
Expand All @@ -105,9 +117,11 @@ def jobs = [
githubTitle : 'Java ParquetIOPerformance Test',
githubTriggerPhrase: 'Run Java ParquetIO Performance Test',
pipelineOptions : [
bigQueryDataset: 'beam_performance',
bigQueryTable : 'parquetioit_results',
numberOfRecords: '100000000'
bigQueryDataset : 'beam_performance',
bigQueryTable : 'parquetioit_results',
numberOfRecords : '100000000',
numWorkers : '5',
autoscalingAlgorithm: 'NONE'
]
],
[
Expand All @@ -117,9 +131,11 @@ def jobs = [
githubTitle : 'Java TextIO Performance Test on HDFS',
githubTriggerPhrase: 'Run Java TextIO Performance Test HDFS',
pipelineOptions : [
bigQueryDataset: 'beam_performance',
bigQueryTable : 'textioit_hdfs_results',
numberOfRecords: '1000000'
bigQueryDataset : 'beam_performance',
bigQueryTable : 'textioit_hdfs_results',
numberOfRecords : '1000000',
numWorkers : '5',
autoscalingAlgorithm: 'NONE'
]

],
Expand All @@ -130,10 +146,12 @@ def jobs = [
githubTitle : 'Java CompressedTextIO Performance Test on HDFS',
githubTriggerPhrase: 'Run Java CompressedTextIO Performance Test HDFS',
pipelineOptions : [
bigQueryDataset: 'beam_performance',
bigQueryTable : 'compressed_textioit_hdfs_results',
numberOfRecords: '1000000',
compressionType: 'GZIP'
bigQueryDataset : 'beam_performance',
bigQueryTable : 'compressed_textioit_hdfs_results',
numberOfRecords : '1000000',
compressionType : 'GZIP',
numWorkers : '5',
autoscalingAlgorithm: 'NONE'
]
],
[
Expand All @@ -148,7 +166,9 @@ def jobs = [
reportGcsPerformanceMetrics: 'true',
gcsPerformanceMetrics : 'true',
numberOfRecords : '1000000',
numberOfShards : '1000'
numberOfShards : '1000',
numWorkers : '5',
autoscalingAlgorithm : 'NONE'
]

],
Expand All @@ -159,9 +179,11 @@ def jobs = [
githubTitle : 'Java AvroIO Performance Test on HDFS',
githubTriggerPhrase: 'Run Java AvroIO Performance Test HDFS',
pipelineOptions : [
bigQueryDataset: 'beam_performance',
bigQueryTable : 'avroioit_hdfs_results',
numberOfRecords: '1000000'
bigQueryDataset : 'beam_performance',
bigQueryTable : 'avroioit_hdfs_results',
numberOfRecords : '1000000',
numWorkers : '5',
autoscalingAlgorithm: 'NONE'
]
],
[
Expand All @@ -171,7 +193,9 @@ def jobs = [
githubTitle : 'Java TFRecordIO Performance Test on HDFS',
githubTriggerPhrase: 'Run Java TFRecordIO Performance Test HDFS',
pipelineOptions : [
numberOfRecords: '1000000'
numberOfRecords : '1000000',
numWorkers : '5',
autoscalingAlgorithm: 'NONE'
]
],
[
Expand All @@ -181,10 +205,12 @@ def jobs = [
githubTitle : 'Java XmlIOPerformance Test on HDFS',
githubTriggerPhrase: 'Run Java XmlIO Performance Test HDFS',
pipelineOptions : [
bigQueryDataset: 'beam_performance',
bigQueryTable : 'xmlioit_hdfs_results',
numberOfRecords: '100000',
charset : 'UTF-8'
bigQueryDataset : 'beam_performance',
bigQueryTable : 'xmlioit_hdfs_results',
numberOfRecords : '100000',
charset : 'UTF-8',
numWorkers : '5',
autoscalingAlgorithm: 'NONE'
]
],
[
Expand All @@ -194,9 +220,11 @@ def jobs = [
githubTitle : 'Java ParquetIOPerformance Test on HDFS',
githubTriggerPhrase: 'Run Java ParquetIO Performance Test HDFS',
pipelineOptions : [
bigQueryDataset: 'beam_performance',
bigQueryTable : 'parquetioit_hdfs_results',
numberOfRecords: '1000000'
bigQueryDataset : 'beam_performance',
bigQueryTable : 'parquetioit_hdfs_results',
numberOfRecords : '1000000',
numWorkers : '5',
autoscalingAlgorithm: 'NONE'
]
]
]
Expand Down
26 changes: 14 additions & 12 deletions .test-infra/jenkins/job_PerformanceTests_HadoopFormat.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,20 @@ job(jobName) {
k8s.loadBalancerIP("postgres-for-dev", postgresHostName)

Map pipelineOptions = [
tempRoot : 'gs://temp-storage-for-perf-tests',
project : 'apache-beam-testing',
runner : 'DataflowRunner',
numberOfRecords : '600000',
bigQueryDataset : 'beam_performance',
bigQueryTable : 'hadoopformatioit_results',
postgresUsername : 'postgres',
postgresPassword : 'uuinkks',
postgresDatabaseName: 'postgres',
postgresServerName : "\$${postgresHostName}",
postgresSsl : false,
postgresPort : '5432',
tempRoot : 'gs://temp-storage-for-perf-tests',
project : 'apache-beam-testing',
runner : 'DataflowRunner',
numberOfRecords : '600000',
bigQueryDataset : 'beam_performance',
bigQueryTable : 'hadoopformatioit_results',
postgresUsername : 'postgres',
postgresPassword : 'uuinkks',
postgresDatabaseName : 'postgres',
postgresServerName : "\$${postgresHostName}",
postgresSsl : false,
postgresPort : '5432',
numWorkers : '5',
autoscalingAlgorithm : 'NONE'
]

steps {
Expand Down
26 changes: 14 additions & 12 deletions .test-infra/jenkins/job_PerformanceTests_JDBC.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,20 @@ job(jobName) {
k8s.loadBalancerIP("postgres-for-dev", postgresHostName)

Map pipelineOptions = [
tempRoot : 'gs://temp-storage-for-perf-tests',
project : 'apache-beam-testing',
runner : 'DataflowRunner',
numberOfRecords : '5000000',
bigQueryDataset : 'beam_performance',
bigQueryTable : 'jdbcioit_results',
postgresUsername : 'postgres',
postgresPassword : 'uuinkks',
postgresDatabaseName: 'postgres',
postgresServerName : "\$${postgresHostName}",
postgresSsl : false,
postgresPort : '5432'
tempRoot : 'gs://temp-storage-for-perf-tests',
project : 'apache-beam-testing',
runner : 'DataflowRunner',
numberOfRecords : '5000000',
bigQueryDataset : 'beam_performance',
bigQueryTable : 'jdbcioit_results',
postgresUsername : 'postgres',
postgresPassword : 'uuinkks',
postgresDatabaseName : 'postgres',
postgresServerName : "\$${postgresHostName}",
postgresSsl : false,
postgresPort : '5432',
autoscalingAlgorithm : 'NONE',
numWorkers : '5'
]

steps {
Expand Down
20 changes: 11 additions & 9 deletions .test-infra/jenkins/job_PerformanceTests_MongoDBIO_IT.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,17 @@ job(jobName) {
k8s.loadBalancerIP("mongo-load-balancer-service", mongoHostName)

Map pipelineOptions = [
tempRoot : 'gs://temp-storage-for-perf-tests',
project : 'apache-beam-testing',
numberOfRecords: '10000000',
bigQueryDataset: 'beam_performance',
bigQueryTable : 'mongodbioit_results',
mongoDBDatabaseName: 'beam',
mongoDBHostName: "\$${mongoHostName}",
mongoDBPort: 27017,
runner: 'DataflowRunner'
tempRoot : 'gs://temp-storage-for-perf-tests',
project : 'apache-beam-testing',
numberOfRecords : '10000000',
bigQueryDataset : 'beam_performance',
bigQueryTable : 'mongodbioit_results',
mongoDBDatabaseName : 'beam',
mongoDBHostName : "\$${mongoHostName}",
mongoDBPort : 27017,
runner : 'DataflowRunner',
autoscalingAlgorithm: 'NONE',
numWorkers : '5'
]

steps {
Expand Down
31 changes: 23 additions & 8 deletions .test-infra/metrics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,30 @@
under the License.
-->
# BeamMonitoring
This folder contains resources required to deploy the Beam community metrics
stack.
This folder contains resources required to deploy the Beam metrics stack.
There are two types of metrics in Beam project:
* Community metrics
* Metrics published by tests (IO Performance tests, Load tests and Nexmark tests)

[Beam community dashboard is available here.](https://s.apache.org/beam-community-metrics)
Both types of metrics are presented in [Grafana dashboard available here.](https://s.apache.org/beam-community-metrics)

Whole stack can be deployed on your local machine as well.
## Community metrics

This includes
* Python scripts for ingesting data from sources (Jenkins, JIRA,
GitHub)
* Postgres analytics database
* [Grafana](https://grafana.com) dashboarding UI

## Test metrics
Beam uses Prometheus to store metrics published by tests running on Jenkins.

Prometheus stack consists of the following components
* the main Prometheus server
* Alertmanager
* Pushgateway

Both stacks can be deployed on your local machine.
All components run within Docker containers. These are composed together via
docker-compose for local hosting, and Kubernetes for the production instance on
GCP.
Expand Down Expand Up @@ -90,17 +101,21 @@ After running these commands, you can access the services running on your local
machine:

* Grafana: http://localhost:3000
* Postgres DB: localhost:5432
* Postgres DB: http://localhost:5432
* Prometheus: http://localhost:9090
* Pushgateway: http://localhost:9091
* Alertmanager: http://localhost:9093

If you're deploying for the first time on your machine, follow the wiki instructions
on how to manually [configure
Grafana](https://cwiki.apache.org/confluence/display/BEAM/Community+Metrics#CommunityMetrics-GrafanaUI).

Grafana and Postgres containers persist data to Docker volumes, which will be
Grafana, Postgres and Prometheus containers persist data to Docker volumes, which will be
restored on subsequent runs. To start from a clean state, you must also wipe out
these volumes. (List volumes via `docker volume ls`)

## Kubernetes setup

Kubernetes deployment instructions are maintained in the
[wiki](https://cwiki.apache.org/confluence/display/BEAM/Community+Metrics).
Kubernetes deployment instructions are maintained in the wiki:
* [Community metrics](https://cwiki.apache.org/confluence/display/BEAM/Community+Metrics)
* [Test metrics]() <!-- TODO(BEAM-8130): add a link to instructions -->
26 changes: 26 additions & 0 deletions .test-infra/metrics/apply_configmaps.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Creates config maps used by Prometheus deployment and deletes old ones.

set -euxo pipefail

kubectl delete configmap prometheus-config --ignore-not-found=true
kubectl delete configmap alertmanager-config --ignore-not-found=true

kubectl create configmap prometheus-config --from-file=prometheus/prometheus/config
kubectl create configmap alertmanager-config --from-file=prometheus/alertmanager/config
Loading

0 comments on commit fd72944

Please sign in to comment.