diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 86e2c8919da9..ec7314cfbd82 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -14,12 +14,12 @@ See the [Contributor Guide](https://beam.apache.org/contribute) for more tips on Post-Commit Tests Status (on master branch) ------------------------------------------------------------------------------------------------ -Lang | SDK | Apex | Dataflow | Flink | Samza | Spark +Lang | SDK | Dataflow | Flink | Samza | Spark --- | --- | --- | --- | --- | --- | --- -Go | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/) | --- | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/) | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/) -Java | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java11/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java11/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Java11/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Java11/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming/lastCompletedBuild/) -Python | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python2/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python2/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python35/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python35/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python36/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python36/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python37/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python37/lastCompletedBuild/) | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Py_VR_Dataflow_V2/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Py_VR_Dataflow_V2/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Py_ValCont/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Py_ValCont/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Python2_PVR_Flink_Cron/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Python2_PVR_Flink_Cron/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python35_VR_Flink/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python35_VR_Flink/lastCompletedBuild/) | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python_VR_Spark/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python_VR_Spark/lastCompletedBuild/) -XLang | --- | --- | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_XVR_Flink/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_XVR_Flink/lastCompletedBuild/) | --- | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_XVR_Spark/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_XVR_Spark/lastCompletedBuild/) +Go | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/) | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/) | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/) +Java | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java11/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java11/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Java11/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_Java11/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming/lastCompletedBuild/) +Python | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python2/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python2/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python35/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python35/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python36/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python36/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python37/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python37/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Py_VR_Dataflow_V2/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Py_VR_Dataflow_V2/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Py_ValCont/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Py_ValCont/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Python2_PVR_Flink_Cron/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Python2_PVR_Flink_Cron/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python35_VR_Flink/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python35_VR_Flink/lastCompletedBuild/) | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python_VR_Spark/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python_VR_Spark/lastCompletedBuild/) +XLang | --- | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_XVR_Flink/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_XVR_Flink/lastCompletedBuild/) | --- | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_XVR_Spark/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_XVR_Spark/lastCompletedBuild/) Pre-Commit Tests Status (on master branch) ------------------------------------------------------------------------------------------------ diff --git a/.github/autolabeler.yml b/.github/autolabeler.yml index 933f4f6d0212..20db446faa90 100644 --- a/.github/autolabeler.yml +++ b/.github/autolabeler.yml @@ -74,7 +74,6 @@ io: ["sdks/go/pkg/beam/io/*", "sdks/java/io/*", "sdks/python/apache_beam/io/*"] # Runners "runners": ["runners/*", "sdks/go/pkg/beam/runners/*", "sdks/python/runners/*"] -"apex": ["runners/apex/*"] "core": ["runners/core-construction-java/*", "runners/core-java/*"] "dataflow": ["runners/google-cloud-dataflow-java/*", "sdks/go/pkg/beam/runners/dataflow/*", "sdks/python/runners/dataflow/*"] "direct": ["runners/direct-java/*", "sdks/go/pkg/beam/runners/direct/*", "sdks/python/runners/direct/*"] diff --git a/.test-infra/jenkins/README.md b/.test-infra/jenkins/README.md index 08876a5d5f78..2f758384a1f2 100644 --- a/.test-infra/jenkins/README.md +++ b/.test-infra/jenkins/README.md @@ -66,7 +66,6 @@ Beam Jenkins overview page: [link](https://ci-beam.apache.org/) | beam_PostCommit_Java_ValidatesRunner_Dataflow_Java11 | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java11/), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java11_PR/) | `Run Dataflow ValidatesRunner Java 11` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java11/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java11) | | beam_PostCommit_Java_ValidatesRunner_PortabilityApi_Dataflow_Java11 | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_PortabilityApi_Dataflow_Java11/), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_PortabilityApi_Dataflow_Java11_PR/) | `Run Dataflow PortabilityApi ValidatesRunner Java 11` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_PortabilityApi_Dataflow_Java11/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_PortabilityApi_Dataflow_Java11) | | beam_PostCommit_Java_ValidatesRunner_Direct_Java11 | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Direct_Java11), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Direct_Java11_PR) | `Run Direct ValidatesRunner Java 11` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Direct_Java11/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Direct_Java11) | -| beam_PostCommit_Java_ValidatesRunner_Apex | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex_PR/) | `Run Apex ValidatesRunner` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex) | | beam_PostCommit_Java_ValidatesRunner_Dataflow | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow_PR/) | `Run Dataflow ValidatesRunner` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow) | | beam_PostCommit_Java_ValidatesRunner_Flink | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink_PR/) | `Run Flink ValidatesRunner` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink) | | beam_PostCommit_Java_ValidatesRunner_PortabilityApi_Dataflow | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_PortabilityApi_Dataflow/), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_PortabilityApi_Dataflow_PR/) | `Run Dataflow PortabilityApi ValidatesRunner` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_PortabilityApi_Dataflow/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_PortabilityApi_Dataflow) | diff --git a/.test-infra/jenkins/job_PostCommit_Java_ValidatesRunner_Apex.groovy b/.test-infra/jenkins/job_PostCommit_Java_ValidatesRunner_Apex.groovy deleted file mode 100644 index 3cbf3966afec..000000000000 --- a/.test-infra/jenkins/job_PostCommit_Java_ValidatesRunner_Apex.groovy +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import CommonJobProperties as commonJobProperties -import PostcommitJobBuilder - -// This job runs the suite of ValidatesRunner tests against the Apex runner. -PostcommitJobBuilder.postCommitJob('beam_PostCommit_Java_ValidatesRunner_Apex', - 'Run Apex ValidatesRunner', 'Apache Apex Runner ValidatesRunner Tests', this) { - description('Runs the ValidatesRunner suite on the Apex runner.') - - // Set common parameters. - commonJobProperties.setTopLevelMainJobProperties(delegate) - previousNames(/beam_PostCommit_Java_ValidatesRunner_Apex_Gradle/) - - // Publish all test results to Jenkins - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } - - // Gradle goals for this job. - steps { - gradle { - rootBuildScriptDir(commonJobProperties.checkoutDir) - tasks(':runners:apex:validatesRunner') - commonJobProperties.setGradleSwitches(delegate) - } - } -} diff --git a/README.md b/README.md index 24ac4901133d..6c4e1e23e161 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ # Apache Beam -[Apache Beam](http://beam.apache.org/) is a unified model for defining both batch and streaming data-parallel processing pipelines, as well as a set of language-specific SDKs for constructing pipelines and Runners for executing them on distributed processing backends, including [Apache Apex](http://apex.apache.org/), [Apache Flink](http://flink.apache.org/), [Apache Spark](http://spark.apache.org/), [Google Cloud Dataflow](http://cloud.google.com/dataflow/) and [Hazelcast Jet](https://jet.hazelcast.org/). +[Apache Beam](http://beam.apache.org/) is a unified model for defining both batch and streaming data-parallel processing pipelines, as well as a set of language-specific SDKs for constructing pipelines and Runners for executing them on distributed processing backends, including [Apache Flink](http://flink.apache.org/), [Apache Spark](http://spark.apache.org/), [Google Cloud Dataflow](http://cloud.google.com/dataflow/) and [Hazelcast Jet](https://jet.hazelcast.org/). ## Status @@ -32,12 +32,12 @@ ### Post-commit tests status (on master branch) -Lang | SDK | Apex | Dataflow | Flink | Samza | Spark ---- | --- | --- | --- | --- | --- | --- -Go | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/) | --- | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/) | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/) -Java | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Apex/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/) -Python | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python2/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python2/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python35/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python35/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python36/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python36/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python37/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python37/lastCompletedBuild/) | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Py_VR_Dataflow_V2/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Py_VR_Dataflow_V2/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Py_ValCont/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Py_ValCont/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Python2_PVR_Flink_Cron/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Python2_PVR_Flink_Cron/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python35_VR_Flink/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python35_VR_Flink/lastCompletedBuild/) | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python_VR_Spark/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python_VR_Spark/lastCompletedBuild/) -XLang | --- | --- | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_XVR_Flink/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_XVR_Flink/lastCompletedBuild/) | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_XVR_Spark/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_XVR_Spark/lastCompletedBuild/) +Lang | SDK | Dataflow | Flink | Samza | Spark +--- | --- | --- | --- | --- | --- +Go | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Go/lastCompletedBuild/) | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Go_VR_Flink/lastCompletedBuild/) | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Go_VR_Spark/lastCompletedBuild/) +Java | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Dataflow/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Flink/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Flink_Batch/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Flink_Streaming/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Samza/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_ValidatesRunner_Spark/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/lastCompletedBuild/) +Python | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python2/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python2/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python35/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python35/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python36/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python36/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python37/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python37/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Py_VR_Dataflow/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Py_VR_Dataflow_V2/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Py_VR_Dataflow_V2/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Py_ValCont/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Py_ValCont/lastCompletedBuild/) | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Python2_PVR_Flink_Cron/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Python2_PVR_Flink_Cron/lastCompletedBuild/)
[![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python35_VR_Flink/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python35_VR_Flink/lastCompletedBuild/) | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python_VR_Spark/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python_VR_Spark/lastCompletedBuild/) +XLang | --- | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_XVR_Flink/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_XVR_Flink/lastCompletedBuild/) | --- | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_XVR_Spark/lastCompletedBuild/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_XVR_Spark/lastCompletedBuild/) ## Overview @@ -73,7 +73,6 @@ Have ideas for new SDKs or DSLs? See the [JIRA](https://issues.apache.org/jira/i Beam supports executing programs on multiple distributed processing backends through PipelineRunners. Currently, the following PipelineRunners are available: - The `DirectRunner` runs the pipeline on your local machine. -- The `ApexRunner` runs the pipeline on an Apache Hadoop YARN cluster (or in embedded mode). - The `DataflowRunner` submits the pipeline to the [Google Cloud Dataflow](http://cloud.google.com/dataflow/). - The `FlinkRunner` runs the pipeline on an Apache Flink cluster. The code has been donated from [dataArtisans/flink-dataflow](https://github.com/dataArtisans/flink-dataflow) and is now part of Beam. - The `SparkRunner` runs the pipeline on an Apache Spark cluster. The code has been donated from [cloudera/spark-dataflow](https://github.com/cloudera/spark-dataflow) and is now part of Beam. diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index 0bbd8325d30a..114c72afa8f0 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -178,7 +178,7 @@ class BeamModulePlugin implements Plugin { // and also for the script name, ${type}-java-${runner}.toLowerCase(). String type - // runner [Direct, Dataflow, Spark, Flink, FlinkLocal, Apex] + // runner [Direct, Dataflow, Spark, Flink, FlinkLocal] String runner // gcpProject sets the gcpProject argument when executing examples. @@ -378,8 +378,6 @@ class BeamModulePlugin implements Plugin { // These versions are defined here because they represent // a dependency version which should match across multiple // Maven artifacts. - def apex_core_version = "3.7.0" - def apex_malhar_version = "3.4.0" def aws_java_sdk_version = "1.11.718" def aws_java_sdk2_version = "2.10.61" def cassandra_driver_version = "3.8.0" @@ -426,8 +424,6 @@ class BeamModulePlugin implements Plugin { activemq_mqtt : "org.apache.activemq:activemq-mqtt:5.13.1", antlr : "org.antlr:antlr4:4.7", antlr_runtime : "org.antlr:antlr4-runtime:4.7", - apex_common : "org.apache.apex:apex-common:$apex_core_version", - apex_engine : "org.apache.apex:apex-engine:$apex_core_version", args4j : "args4j:args4j:2.33", avro : "org.apache.avro:avro:1.8.2", avro_tests : "org.apache.avro:avro:1.8.2:tests", @@ -528,7 +524,6 @@ class BeamModulePlugin implements Plugin { junit : "junit:junit:4.13-beta-3", kafka : "org.apache.kafka:kafka_2.11:$kafka_version", kafka_clients : "org.apache.kafka:kafka-clients:$kafka_version", - malhar_library : "org.apache.apex:malhar-library:$apex_malhar_version", mockito_core : "org.mockito:mockito-core:3.0.0", nemo_compiler_frontend_beam : "org.apache.nemo:nemo-compiler-frontend-beam:$nemo_version", netty_handler : "io.netty:netty-handler:$netty_version", diff --git a/examples/java/build.gradle b/examples/java/build.gradle index 5b08348aa5c8..222a06b39ce7 100644 --- a/examples/java/build.gradle +++ b/examples/java/build.gradle @@ -32,7 +32,6 @@ artifact includes all Apache Beam Java SDK examples.""" * Some runners are run from separate projects, see the preCommit task below * for details. */ -// TODO: Add apexRunner - https://issues.apache.org/jira/browse/BEAM-3583 def preCommitRunners = ["directRunner", "flinkRunner", "sparkRunner"] for (String runner : preCommitRunners) { configurations.create(runner + "PreCommit") @@ -75,8 +74,6 @@ dependencies { delegate.add(runner + "PreCommit", project(":examples:java")) delegate.add(runner + "PreCommit", project(path: ":examples:java", configuration: "testRuntime")) } - // https://issues.apache.org/jira/browse/BEAM-3583 - // apexRunnerPreCommit project(":runners:apex") directRunnerPreCommit project(path: ":runners:direct-java", configuration: "shadow") flinkRunnerPreCommit project(":runners:flink:1.10") // TODO: Make the netty version used configurable, we add netty-all 4.1.17.Final so it appears on the classpath @@ -93,7 +90,6 @@ dependencies { * of integration tests for WordCount and WindowedWordCount. */ def preCommitRunnerClass = [ - apexRunner: "org.apache.beam.runners.apex.TestApexRunner", directRunner: "org.apache.beam.runners.direct.DirectRunner", flinkRunner: "org.apache.beam.runners.flink.TestFlinkRunner", sparkRunner: "org.apache.beam.runners.spark.TestSparkRunner", diff --git a/examples/kotlin/build.gradle b/examples/kotlin/build.gradle index 6d38c64dc480..847cd808c346 100644 --- a/examples/kotlin/build.gradle +++ b/examples/kotlin/build.gradle @@ -35,7 +35,6 @@ artifact includes all Apache Beam Kotlin SDK examples.""" * Some runners are run from separate projects, see the preCommit task below * for details. */ -// TODO: Add apexRunner - https://issues.apache.org/jira/browse/BEAM-3583 def preCommitRunners = ["directRunner", "flinkRunner", "sparkRunner"] for (String runner : preCommitRunners) { configurations.create(runner + "PreCommit") @@ -78,8 +77,6 @@ dependencies { delegate.add(runner + "PreCommit", project(":examples:kotlin")) delegate.add(runner + "PreCommit", project(path: ":examples:kotlin", configuration: "testRuntime")) } - // https://issues.apache.org/jira/browse/BEAM-3583 - // apexRunnerPreCommit project(":runners:apex") directRunnerPreCommit project(path: ":runners:direct-java", configuration: "shadow") flinkRunnerPreCommit project(":runners:flink:1.10") // TODO: Make the netty version used configurable, we add netty-all 4.1.17.Final so it appears on the classpath @@ -97,7 +94,6 @@ dependencies { * of integration tests for WordCount and WindowedWordCount. */ def preCommitRunnerClass = [ - apexRunner: "org.apache.beam.runners.apex.TestApexRunner", directRunner: "org.apache.beam.runners.direct.DirectRunner", flinkRunner: "org.apache.beam.runners.flink.TestFlinkRunner", sparkRunner: "org.apache.beam.runners.spark.TestSparkRunner", diff --git a/learning/katas/go/Introduction/Hello Beam/Hello Beam/task.md b/learning/katas/go/Introduction/Hello Beam/Hello Beam/task.md index beaa497e5797..c7cbc5345546 100644 --- a/learning/katas/go/Introduction/Hello Beam/Hello Beam/task.md +++ b/learning/katas/go/Introduction/Hello Beam/Hello Beam/task.md @@ -22,7 +22,7 @@ Apache Beam is an open source, unified model for defining both batch and streaming data-parallel processing pipelines. Using one of the open source Beam SDKs, you build a program that defines the pipeline. The pipeline is then executed by one of Beam’s supported distributed processing -back-ends, which include Apache Apex, Apache Flink, Apache Spark, and Google Cloud Dataflow. +back-ends, which include Apache Flink, Apache Spark, and Google Cloud Dataflow. Beam is particularly useful for Embarrassingly Parallel data processing tasks, in which the problem can be decomposed into many smaller bundles of data that can be processed independently diff --git a/learning/katas/kotlin/Introduction/Hello Beam/Hello Beam/task.md b/learning/katas/kotlin/Introduction/Hello Beam/Hello Beam/task.md index c1ef872d6d7a..7ef5bd391c55 100644 --- a/learning/katas/kotlin/Introduction/Hello Beam/Hello Beam/task.md +++ b/learning/katas/kotlin/Introduction/Hello Beam/Hello Beam/task.md @@ -22,7 +22,7 @@ Welcome To Apache Beam Apache Beam is an open source, unified model for defining both batch and streaming data-parallel processing pipelines. Using one of the open source Beam SDKs, you build a program that defines the pipeline. The pipeline is then executed by one of Beam’s supported distributed processing back-ends, -which include Apache Apex, Apache Flink, Apache Spark, and Google Cloud Dataflow. +which include Apache Flink, Apache Spark, and Google Cloud Dataflow. Beam is particularly useful for Embarrassingly Parallel data processing tasks, in which the problem can be decomposed into many smaller bundles of data that can be processed independently and in diff --git a/learning/katas/python/Introduction/Hello Beam/Hello Beam/task.md b/learning/katas/python/Introduction/Hello Beam/Hello Beam/task.md index b6df12c28d86..9f0c1b75bf65 100644 --- a/learning/katas/python/Introduction/Hello Beam/Hello Beam/task.md +++ b/learning/katas/python/Introduction/Hello Beam/Hello Beam/task.md @@ -22,7 +22,7 @@ Welcome To Apache Beam Apache Beam is an open source, unified model for defining both batch and streaming data-parallel processing pipelines. Using one of the open source Beam SDKs, you build a program that defines the pipeline. The pipeline is then executed by one of Beam’s supported distributed processing back-ends, -which include Apache Apex, Apache Flink, Apache Spark, and Google Cloud Dataflow. +which include Apache Flink, Apache Spark, and Google Cloud Dataflow. Beam is particularly useful for Embarrassingly Parallel data processing tasks, in which the problem can be decomposed into many smaller bundles of data that can be processed independently and in diff --git a/ownership/JAVA_DEPENDENCY_OWNERS.yaml b/ownership/JAVA_DEPENDENCY_OWNERS.yaml index 73d146c71ee8..904f4404660e 100644 --- a/ownership/JAVA_DEPENDENCY_OWNERS.yaml +++ b/ownership/JAVA_DEPENDENCY_OWNERS.yaml @@ -599,21 +599,6 @@ deps: artifact: activemq-junit owners: - org.apache.apex:apex-common: - group: org.apache.apex - artifact: apex-common - owners: - - org.apache.apex:apex-engine: - group: org.apache.apex - artifact: apex-engine - owners: - - org.apache.apex:malhar-library: - group: org.apache.apex - artifact: malhar-library - owners: - org.apache.avro:avro: group: org.apache.avro artifact: avro diff --git a/release/build.gradle b/release/build.gradle index 21ab9d882d61..f4d9cfe35b85 100644 --- a/release/build.gradle +++ b/release/build.gradle @@ -32,7 +32,6 @@ task runJavaExamplesValidationTask { description = "Run the Beam quickstart across all Java runners" dependsOn ":runners:direct-java:runQuickstartJavaDirect" dependsOn ":runners:google-cloud-dataflow-java:runQuickstartJavaDataflow" - dependsOn ":runners:apex:runQuickstartJavaApex" dependsOn ":runners:spark:runQuickstartJavaSpark" dependsOn ":runners:flink:1.10:runQuickstartJavaFlinkLocal" dependsOn ":runners:direct-java:runMobileGamingJavaDirect" diff --git a/release/src/main/groovy/MobileGamingCommands.groovy b/release/src/main/groovy/MobileGamingCommands.groovy index 1042062e4cb5..cceca98bc42b 100644 --- a/release/src/main/groovy/MobileGamingCommands.groovy +++ b/release/src/main/groovy/MobileGamingCommands.groovy @@ -27,7 +27,6 @@ class MobileGamingCommands { public static final RUNNERS = [DirectRunner: "direct-runner", DataflowRunner: "dataflow-runner", SparkRunner: "spark-runner", - ApexRunner: "apex-runner", FlinkRunner: "flink-runner"] public static final EXECUTION_TIMEOUT_IN_MINUTES = 20 diff --git a/release/src/main/groovy/quickstart-java-apex.groovy b/release/src/main/groovy/quickstart-java-apex.groovy deleted file mode 100644 index 3e8e00521a0e..000000000000 --- a/release/src/main/groovy/quickstart-java-apex.groovy +++ /dev/null @@ -1,45 +0,0 @@ -#!groovy -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -t = new TestScripts(args) - -/* - * Run the Apex quickstart from https://beam.apache.org/get-started/quickstart-java/ - */ - -t.describe 'Run Apache Beam Java SDK Quickstart - Apex' - - t.intent 'Gets the WordCount Example Code' - QuickstartArchetype.generate(t) - - t.intent 'Runs the WordCount Code with Apex runner' - // Run the wordcount example with the apex runner - t.run """mvn compile exec:java -q \ - -Dexec.mainClass=org.apache.beam.examples.WordCount \ - -Dexec.args="--inputFile=pom.xml \ - --output=counts \ - --runner=ApexRunner" \ - -Papex-runner""" - - // Verify text from the pom.xml input file - String result = t.run "grep Foundation counts*" - t.see "Foundation: 1", result - - // Clean up - t.done() diff --git a/release/src/main/scripts/mass_comment.py b/release/src/main/scripts/mass_comment.py index 4de984c9da89..e4dff8484664 100644 --- a/release/src/main/scripts/mass_comment.py +++ b/release/src/main/scripts/mass_comment.py @@ -33,7 +33,6 @@ "Run Java PostCommit", "Run Java Flink PortableValidatesRunner Batch", "Run Java Flink PortableValidatesRunner Streaming", - "Run Apex ValidatesRunner", "Run Dataflow ValidatesRunner", "Run Flink ValidatesRunner", "Run Samza ValidatesRunner", diff --git a/release/src/main/scripts/run_rc_validation.sh b/release/src/main/scripts/run_rc_validation.sh index 54c76aef1282..a9a21796b5b5 100755 --- a/release/src/main/scripts/run_rc_validation.sh +++ b/release/src/main/scripts/run_rc_validation.sh @@ -193,18 +193,6 @@ else echo "* Skip Java quickstart with direct runner" fi -echo "[Current task] Java quickstart with Apex local runner" -if [[ "$java_quickstart_apex_local" = true ]]; then - echo "*************************************************************" - echo "* Running Java Quickstart with Apex local runner" - echo "*************************************************************" - ./gradlew :runners:apex:runQuickstartJavaApex \ - -Prepourl=${REPO_URL} \ - -Pver=${RELEASE_VER} -else - echo "* Skip Java quickstart with Apex local runner" -fi - echo "[Current task] Java quickstart with Flink local runner" if [[ "$java_quickstart_flink_local" = true ]]; then echo "*************************************************************" diff --git a/runners/apex/build.gradle b/runners/apex/build.gradle deleted file mode 100644 index 31749294adbb..000000000000 --- a/runners/apex/build.gradle +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import groovy.json.JsonOutput - -plugins { id 'org.apache.beam.module' } -applyJavaNature(automaticModuleName: 'org.apache.beam.runners.apex') - -description = "Apache Beam :: Runners :: Apex" - -/* - * We need to rely on manually specifying these evaluationDependsOn to ensure that - * the following projects are evaluated before we evaluate this project. This is because - * we are attempting to reference the "sourceSets.test.output" directly. - */ -evaluationDependsOn(":sdks:java:core") - -configurations { - validatesRunner -} - -dependencies { - compile project(path: ":model:pipeline", configuration: "shadow") - compile project(path: ":sdks:java:core", configuration: "shadow") - compile project(":runners:core-construction-java") - compile project(":runners:core-java") - compile library.java.apex_common - compile library.java.malhar_library - compile library.java.apex_engine - compile library.java.apex_engine - testCompile project(path: ":sdks:java:core", configuration: "shadowTest") - // ApexStateInternalsTest extends abstract StateInternalsTest - testCompile project(path: ":runners:core-java", configuration: "testRuntime") - testCompile library.java.hamcrest_core - testCompile library.java.junit - testCompile library.java.mockito_core - testCompile library.java.jackson_dataformat_yaml - validatesRunner project(path: ":sdks:java:core", configuration: "shadowTest") - validatesRunner project(path: ":runners:core-java", configuration: "testRuntime") - validatesRunner project(project.path) -} - -// TODO: Update this so that the generated file is added to the explicitly added instead of -// just outputting the file in the correct path. -task buildDependencyTree(type: DependencyReportTask) { - configurations = [project.configurations.testRuntimeClasspath] - outputFile = new File(buildDir, "classes/java/main/org/apache/beam/runners/apex/dependency-tree") - // TODO: Migrate ApexYarnLauncher to use the Gradles dependency tree output instead of Mavens - // so we don't have to try to replace the format of the file on the fly - doLast { - // Filter out lines which don't have any dependencies by looking for lines with "--- " - ant.replaceregexp(file: outputFile, match: "^((?!--- ).)*\$", replace: "", byline: true) - // Remove empty lines - ant.replaceregexp(file: outputFile, match: "\\n\\n", replace: "", flags: "gm") - // Replace strings with ":a.b.c -> x.y.z" to just be ":x.y.z" getting the used version of the dependency. - ant.replaceregexp(file: outputFile, match: ":([^:]*) -> (.*)", replace: ":\\2", byline: true) - // Remove a trailing " (*)" off the end to so there is nothing after the version identifier. - ant.replaceregexp(file: outputFile, match: " \\(\\*\\)", replace: "", byline: true) - // Add ":jar" to the maven dependency string assuming that all resource types are jars. - ant.replaceregexp(file: outputFile, match: "[^:]*:[^:]*", replace: "\\0:jar", byline: true) - } -} -compileJava.dependsOn buildDependencyTree - -task validatesRunnerBatch(type: Test) { - group = "Verification" - systemProperty "beamTestPipelineOptions", JsonOutput.toJson([ - "--runner=TestApexRunner", - ]) - - classpath = configurations.validatesRunner - testClassesDirs = files(project(":sdks:java:core").sourceSets.test.output.classesDirs) - useJUnit { - includeCategories 'org.apache.beam.sdk.testing.ValidatesRunner' - excludeCategories 'org.apache.beam.sdk.testing.FlattenWithHeterogeneousCoders' - excludeCategories 'org.apache.beam.sdk.testing.UsesAttemptedMetrics' - excludeCategories 'org.apache.beam.sdk.testing.UsesCommittedMetrics' - excludeCategories 'org.apache.beam.sdk.testing.UsesImpulse' - excludeCategories 'org.apache.beam.sdk.testing.UsesParDoLifecycle' - excludeCategories 'org.apache.beam.sdk.testing.UsesTestStream' - excludeCategories 'org.apache.beam.sdk.testing.UsesTimersInParDo' - excludeCategories 'org.apache.beam.sdk.testing.UsesTimerMap' - excludeCategories 'org.apache.beam.sdk.testing.UsesOnWindowExpiration' - excludeCategories 'org.apache.beam.sdk.testing.UsesMetricsPusher' - excludeCategories 'org.apache.beam.sdk.testing.UsesUnboundedSplittableParDo' - excludeCategories 'org.apache.beam.sdk.testing.UsesUnboundedPCollections' - // TODO[BEAM-8304]: Support multiple side inputs with different coders. - excludeCategories 'org.apache.beam.sdk.testing.UsesSideInputsWithDifferentCoders' - excludeCategories 'org.apache.beam.sdk.testing.UsesBundleFinalizer' - } - - // apex runner is run in embedded mode. Increase default HeapSize - maxHeapSize = '4g' -} - -task validatesRunner { - group = "Verification" - description "Validates Apex runner" - dependsOn validatesRunnerBatch -} - -// Generates :runners:apex:runQuickstartJavaApex -createJavaExamplesArchetypeValidationTask(type: 'Quickstart', runner:'Apex') diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexPipelineOptions.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexPipelineOptions.java deleted file mode 100644 index ba3bf53e8db6..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexPipelineOptions.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex; - -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; - -/** Options that configure the Apex pipeline. */ -public interface ApexPipelineOptions extends PipelineOptions { - - @Description("set unique application name for Apex runner") - void setApplicationName(String name); - - String getApplicationName(); - - @Description("execute the pipeline with embedded cluster") - void setEmbeddedExecution(boolean embedded); - - @Default.Boolean(true) - boolean isEmbeddedExecution(); - - @Description("configure embedded execution with debug friendly options") - void setEmbeddedExecutionDebugMode(boolean embeddedDebug); - - @Default.Boolean(true) - boolean isEmbeddedExecutionDebugMode(); - - @Description("output data received and emitted on ports (for debugging)") - void setTupleTracingEnabled(boolean enabled); - - @Default.Boolean(false) - boolean isTupleTracingEnabled(); - - @Description("how long the client should wait for the pipeline to run") - void setRunMillis(long runMillis); - - @Default.Long(0) - long getRunMillis(); - - @Description("configuration properties file for the Apex engine") - void setConfigFile(String name); - - @Default.String("classpath:/beam-runners-apex.properties") - String getConfigFile(); - - @Description("configure whether to perform ParDo fusion") - void setParDoFusionEnabled(boolean enabled); - - @Default.Boolean(true) - boolean isParDoFusionEnabled(); -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunner.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunner.java deleted file mode 100644 index e728460a5ff8..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunner.java +++ /dev/null @@ -1,391 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex; - -import com.datatorrent.api.Attribute; -import com.datatorrent.api.Context.DAGContext; -import com.datatorrent.api.DAG; -import com.datatorrent.api.StreamingApplication; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Properties; -import java.util.concurrent.atomic.AtomicReference; -import org.apache.apex.api.EmbeddedAppLauncher; -import org.apache.apex.api.Launcher; -import org.apache.apex.api.Launcher.AppHandle; -import org.apache.apex.api.Launcher.LaunchMode; -import org.apache.beam.runners.apex.translation.ApexPipelineTranslator; -import org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems; -import org.apache.beam.runners.core.construction.PTransformMatchers; -import org.apache.beam.runners.core.construction.PTransformReplacements; -import org.apache.beam.runners.core.construction.PrimitiveCreate; -import org.apache.beam.runners.core.construction.SingleInputOutputOverrideFactory; -import org.apache.beam.runners.core.construction.SplittableParDo; -import org.apache.beam.runners.core.construction.SplittableParDoNaiveBounded; -import org.apache.beam.runners.core.construction.UnsupportedOverrideFactory; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.PipelineRunner; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.CoderRegistry; -import org.apache.beam.sdk.coders.ListCoder; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsValidator; -import org.apache.beam.sdk.runners.AppliedPTransform; -import org.apache.beam.sdk.runners.PTransformOverride; -import org.apache.beam.sdk.transforms.Combine; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.View.CreatePCollectionView; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionView; -import org.apache.beam.sdk.values.PCollectionViews; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Throwables; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.io.Files; -import org.apache.commons.io.FileUtils; -import org.apache.hadoop.conf.Configuration; - -/** - * A {@link PipelineRunner} that translates the pipeline to an Apex DAG and executes it on an Apex - * cluster. - */ -public class ApexRunner extends PipelineRunner { - - private final ApexPipelineOptions options; - public static final String CLASSPATH_SCHEME = "classpath"; - protected boolean translateOnly = false; - - /** - * TODO: this isn't thread safe and may cause issues when tests run in parallel Holds any most - * resent assertion error that was raised while processing elements. Used in the unit test driver - * in embedded mode to propagate the exception. - */ - public static final AtomicReference ASSERTION_ERROR = new AtomicReference<>(); - - public ApexRunner(ApexPipelineOptions options) { - this.options = options; - } - - public static ApexRunner fromOptions(PipelineOptions options) { - ApexPipelineOptions apexPipelineOptions = - PipelineOptionsValidator.validate(ApexPipelineOptions.class, options); - return new ApexRunner(apexPipelineOptions); - } - - @SuppressWarnings({"rawtypes"}) - protected List getOverrides() { - return ImmutableList.builder() - .add( - PTransformOverride.of( - PTransformMatchers.classEqualTo(Create.Values.class), - new PrimitiveCreate.Factory())) - .add( - PTransformOverride.of( - PTransformMatchers.createViewWithViewFn(PCollectionViews.IterableViewFn.class), - new StreamingViewAsIterable.Factory())) - .add( - PTransformOverride.of( - PTransformMatchers.createViewWithViewFn(PCollectionViews.ListViewFn.class), - new StreamingViewAsIterable.Factory())) - .add( - PTransformOverride.of( - PTransformMatchers.createViewWithViewFn(PCollectionViews.MapViewFn.class), - new StreamingViewAsIterable.Factory())) - .add( - PTransformOverride.of( - PTransformMatchers.createViewWithViewFn(PCollectionViews.MultimapViewFn.class), - new StreamingViewAsIterable.Factory())) - .add( - PTransformOverride.of( - PTransformMatchers.createViewWithViewFn(PCollectionViews.SingletonViewFn.class), - new StreamingWrapSingletonInList.Factory())) - .add( - PTransformOverride.of( - PTransformMatchers.splittableParDoMulti(), new SplittableParDo.OverrideFactory())) - .add( - PTransformOverride.of( - PTransformMatchers.splittableProcessKeyedBounded(), - new SplittableParDoNaiveBounded.OverrideFactory<>())) - .add( - PTransformOverride.of( - PTransformMatchers.splittableProcessKeyedUnbounded(), - new SplittableParDoViaKeyedWorkItems.OverrideFactory<>())) - // TODO: [BEAM-5360] Support @RequiresStableInput on Apex runner - .add( - PTransformOverride.of( - PTransformMatchers.requiresStableInputParDoMulti(), - UnsupportedOverrideFactory.withMessage( - "Apex runner currently doesn't support @RequiresStableInput annotation."))) - .build(); - } - - @Override - public ApexRunnerResult run(final Pipeline pipeline) { - pipeline.replaceAll(getOverrides()); - - final ApexPipelineTranslator translator = new ApexPipelineTranslator(options); - final AtomicReference apexDAG = new AtomicReference<>(); - final AtomicReference tempDir = new AtomicReference<>(); - - StreamingApplication apexApp = - (dag, conf) -> { - apexDAG.set(dag); - dag.setAttribute(DAGContext.APPLICATION_NAME, options.getApplicationName()); - if (options.isEmbeddedExecution()) { - // set unique path for application state to allow for parallel execution of unit tests - // (the embedded cluster would set it to a fixed location under ./target) - tempDir.set(Files.createTempDir()); - dag.setAttribute(DAGContext.APPLICATION_PATH, tempDir.get().toURI().toString()); - } - translator.translate(pipeline, dag); - }; - - Properties configProperties = new Properties(); - try { - if (options.getConfigFile() != null) { - URI configURL = new URI(options.getConfigFile()); - if (CLASSPATH_SCHEME.equals(configURL.getScheme())) { - InputStream is = this.getClass().getResourceAsStream(configURL.getPath()); - if (is != null) { - configProperties.load(is); - is.close(); - } - } else { - if (!configURL.isAbsolute()) { - // resolve as local file name - File f = new File(options.getConfigFile()); - configURL = f.toURI(); - } - try (InputStream is = configURL.toURL().openStream()) { - configProperties.load(is); - } - } - } - } catch (IOException | URISyntaxException ex) { - throw new RuntimeException("Error loading properties", ex); - } - - if (options.isEmbeddedExecution()) { - EmbeddedAppLauncher launcher = Launcher.getLauncher(LaunchMode.EMBEDDED); - Attribute.AttributeMap launchAttributes = new Attribute.AttributeMap.DefaultAttributeMap(); - launchAttributes.put(EmbeddedAppLauncher.RUN_ASYNC, true); - if (options.isEmbeddedExecutionDebugMode()) { - // turns off timeout checking for operator progress - launchAttributes.put(EmbeddedAppLauncher.HEARTBEAT_MONITORING, false); - } - Configuration conf = new Configuration(false); - ApexYarnLauncher.addProperties(conf, configProperties); - try { - if (translateOnly) { - launcher.prepareDAG(apexApp, conf); - return new ApexRunnerResult(launcher.getDAG(), null); - } - ApexRunner.ASSERTION_ERROR.set(null); - AppHandle apexAppResult = launcher.launchApp(apexApp, conf, launchAttributes); - return new ApexRunnerResult(apexDAG.get(), apexAppResult) { - @Override - protected void cleanupOnCancelOrFinish() { - if (tempDir.get() != null) { - FileUtils.deleteQuietly(tempDir.get()); - } - } - }; - } catch (Exception e) { - Throwables.throwIfUnchecked(e); - throw new RuntimeException(e); - } - } else { - try { - ApexYarnLauncher yarnLauncher = new ApexYarnLauncher(); - AppHandle apexAppResult = yarnLauncher.launchApp(apexApp, configProperties); - return new ApexRunnerResult(apexDAG.get(), apexAppResult); - } catch (IOException e) { - throw new RuntimeException("Failed to launch the application on YARN.", e); - } - } - } - - //////////////////////////////////////////// - // Adapted from FlinkRunner for View support - - /** - * Creates a primitive {@link PCollectionView}. - * - *

For internal use only by runner implementors. - * - * @param The type of the elements of the input PCollection - * @param The type associated with the {@link PCollectionView} used as a side input - */ - public static class CreateApexPCollectionView - extends PTransform, PCollection> { - private static final long serialVersionUID = 1L; - private PCollectionView view; - - private CreateApexPCollectionView(PCollectionView view) { - this.view = view; - } - - public static CreateApexPCollectionView of( - PCollectionView view) { - return new CreateApexPCollectionView<>(view); - } - - @Override - public PCollection expand(PCollection input) { - return PCollection.createPrimitiveOutputInternal( - input.getPipeline(), input.getWindowingStrategy(), input.isBounded(), input.getCoder()); - } - - public PCollectionView getView() { - return view; - } - } - - private static class WrapAsList extends DoFn> { - @ProcessElement - public void processElement(ProcessContext c) { - c.output(Collections.singletonList(c.element())); - } - } - - private static class StreamingWrapSingletonInList - extends PTransform, PCollection> { - private static final long serialVersionUID = 1L; - CreatePCollectionView transform; - - /** Builds an instance of this class from the overridden transform. */ - private StreamingWrapSingletonInList(CreatePCollectionView transform) { - this.transform = transform; - } - - @Override - public PCollection expand(PCollection input) { - input - .apply(ParDo.of(new WrapAsList<>())) - .apply(CreateApexPCollectionView.of(transform.getView())); - return input; - } - - @Override - protected String getKindString() { - return "StreamingWrapSingletonInList"; - } - - static class Factory - extends SingleInputOutputOverrideFactory< - PCollection, PCollection, CreatePCollectionView> { - @Override - public PTransformReplacement, PCollection> getReplacementTransform( - AppliedPTransform, PCollection, CreatePCollectionView> - transform) { - return PTransformReplacement.of( - PTransformReplacements.getSingletonMainInput(transform), - new StreamingWrapSingletonInList<>(transform.getTransform())); - } - } - } - - private static class StreamingViewAsIterable - extends PTransform, PCollection> { - private static final long serialVersionUID = 1L; - private final PCollectionView> view; - - private StreamingViewAsIterable(PCollectionView> view) { - this.view = view; - } - - @Override - public PCollection expand(PCollection input) { - return ((PCollection) - input.apply(Combine.globally(new Concatenate()).withoutDefaults())) - .apply(CreateApexPCollectionView.of(view)); - } - - @Override - protected String getKindString() { - return "StreamingViewAsIterable"; - } - - static class Factory - extends SingleInputOutputOverrideFactory< - PCollection, PCollection, CreatePCollectionView>> { - @Override - public PTransformReplacement, PCollection> getReplacementTransform( - AppliedPTransform, PCollection, CreatePCollectionView>> - transform) { - return PTransformReplacement.of( - PTransformReplacements.getSingletonMainInput(transform), - new StreamingViewAsIterable<>(transform.getTransform().getView())); - } - } - } - - /** - * Combiner that combines {@code T}s into a single {@code List} containing all inputs. They - * require the input {@link PCollection} fits in memory. For a large {@link PCollection} this is - * expected to crash! - * - * @param the type of elements to concatenate. - */ - private static class Concatenate extends Combine.CombineFn, List> { - private static final long serialVersionUID = 1L; - - @Override - public List createAccumulator() { - return new ArrayList<>(); - } - - @Override - public List addInput(List accumulator, T input) { - accumulator.add(input); - return accumulator; - } - - @Override - public List mergeAccumulators(Iterable> accumulators) { - List result = createAccumulator(); - for (List accumulator : accumulators) { - result.addAll(accumulator); - } - return result; - } - - @Override - public List extractOutput(List accumulator) { - return accumulator; - } - - @Override - public Coder> getAccumulatorCoder(CoderRegistry registry, Coder inputCoder) { - return ListCoder.of(inputCoder); - } - - @Override - public Coder> getDefaultOutputCoder(CoderRegistry registry, Coder inputCoder) { - return ListCoder.of(inputCoder); - } - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunnerRegistrar.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunnerRegistrar.java deleted file mode 100644 index ad89723576ad..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunnerRegistrar.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex; - -import com.google.auto.service.AutoService; -import org.apache.beam.sdk.PipelineRunner; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsRegistrar; -import org.apache.beam.sdk.runners.PipelineRunnerRegistrar; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; - -/** - * Contains the {@link PipelineRunnerRegistrar} and {@link PipelineOptionsRegistrar} for the {@link - * ApexRunner}. - * - *

{@link AutoService} will register Apex's implementations of the {@link PipelineRunner} and - * {@link PipelineOptions} as available pipeline runner services. - */ -public final class ApexRunnerRegistrar { - private ApexRunnerRegistrar() {} - - /** Registers the {@link ApexRunner}. */ - @AutoService(PipelineRunnerRegistrar.class) - public static class Runner implements PipelineRunnerRegistrar { - @Override - public Iterable>> getPipelineRunners() { - return ImmutableList.of(ApexRunner.class, TestApexRunner.class); - } - } - - /** Registers the {@link ApexPipelineOptions}. */ - @AutoService(PipelineOptionsRegistrar.class) - public static class Options implements PipelineOptionsRegistrar { - @Override - public Iterable> getPipelineOptions() { - return ImmutableList.of(ApexPipelineOptions.class); - } - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunnerResult.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunnerResult.java deleted file mode 100644 index e62a65965401..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexRunnerResult.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex; - -import com.datatorrent.api.DAG; -import java.io.IOException; -import javax.annotation.Nullable; -import org.apache.apex.api.Launcher.AppHandle; -import org.apache.apex.api.Launcher.ShutdownMode; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.metrics.MetricResults; -import org.joda.time.Duration; - -/** Result of executing a {@link Pipeline} with Apex in embedded mode. */ -public class ApexRunnerResult implements PipelineResult { - private final DAG apexDAG; - private final AppHandle apexApp; - private State state = State.UNKNOWN; - - public ApexRunnerResult(DAG dag, AppHandle apexApp) { - this.apexDAG = dag; - this.apexApp = apexApp; - } - - @Override - public State getState() { - return state; - } - - @Override - public State cancel() throws IOException { - apexApp.shutdown(ShutdownMode.KILL); - cleanupOnCancelOrFinish(); - state = State.CANCELLED; - return state; - } - - @Override - @Nullable - public State waitUntilFinish(@Nullable Duration duration) { - long timeout = - (duration == null || duration.getMillis() < 1) - ? Long.MAX_VALUE - : System.currentTimeMillis() + duration.getMillis(); - try { - while (!apexApp.isFinished() && System.currentTimeMillis() < timeout) { - if (ApexRunner.ASSERTION_ERROR.get() != null) { - throw ApexRunner.ASSERTION_ERROR.get(); - } - Thread.sleep(500); - } - if (apexApp.isFinished()) { - cleanupOnCancelOrFinish(); - return State.DONE; - } - return null; - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - - @Override - public State waitUntilFinish() { - return waitUntilFinish(null); - } - - @Override - public MetricResults metrics() { - throw new UnsupportedOperationException(); - } - - /** - * Return the DAG executed by the pipeline. - * - * @return DAG from translation. - */ - public DAG getApexDAG() { - return apexDAG; - } - - /** Opportunity for a subclass to perform cleanup, such as removing temporary files. */ - protected void cleanupOnCancelOrFinish() {} -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexYarnLauncher.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexYarnLauncher.java deleted file mode 100644 index 15e3968914dc..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/ApexYarnLauncher.java +++ /dev/null @@ -1,415 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex; - -import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument; - -import com.datatorrent.api.Attribute; -import com.datatorrent.api.Attribute.AttributeMap; -import com.datatorrent.api.DAG; -import com.datatorrent.api.StreamingApplication; -import java.io.BufferedReader; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.io.Serializable; -import java.lang.reflect.AccessibleObject; -import java.lang.reflect.Field; -import java.lang.reflect.Modifier; -import java.net.URI; -import java.net.URL; -import java.net.URLClassLoader; -import java.nio.charset.StandardCharsets; -import java.nio.file.FileSystem; -import java.nio.file.FileSystems; -import java.nio.file.FileVisitResult; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.attribute.BasicFileAttributes; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Set; -import java.util.jar.JarFile; -import java.util.jar.Manifest; -import org.apache.apex.api.EmbeddedAppLauncher; -import org.apache.apex.api.Launcher; -import org.apache.apex.api.Launcher.AppHandle; -import org.apache.apex.api.Launcher.LaunchMode; -import org.apache.apex.api.Launcher.LauncherException; -import org.apache.apex.api.Launcher.ShutdownMode; -import org.apache.apex.api.YarnAppLauncher; -import org.apache.beam.repackaged.core.org.apache.commons.lang3.SerializationUtils; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Splitter; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets; -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Proxy to launch the YARN application through the hadoop script to run in the pre-configured - * environment (class path, configuration, native libraries etc.). - * - *

The proxy takes the DAG and communicates with the Hadoop services to launch it on the cluster. - */ -public class ApexYarnLauncher { - private static final Logger LOG = LoggerFactory.getLogger(ApexYarnLauncher.class); - - public AppHandle launchApp(StreamingApplication app, Properties configProperties) - throws IOException { - - List jarsToShip = getYarnDeployDependencies(); - StringBuilder classpath = new StringBuilder(); - for (File path : jarsToShip) { - if (path.isDirectory()) { - File tmpJar = File.createTempFile("beam-runners-apex-", ".jar"); - createJar(path, tmpJar); - tmpJar.deleteOnExit(); - path = tmpJar; - } - if (classpath.length() != 0) { - classpath.append(':'); - } - classpath.append(path.getAbsolutePath()); - } - - EmbeddedAppLauncher embeddedLauncher = Launcher.getLauncher(LaunchMode.EMBEDDED); - DAG dag = embeddedLauncher.getDAG(); - app.populateDAG(dag, new Configuration(false)); - - Attribute.AttributeMap launchAttributes = new Attribute.AttributeMap.DefaultAttributeMap(); - launchAttributes.put(YarnAppLauncher.LIB_JARS, classpath.toString().replace(':', ',')); - LaunchParams lp = new LaunchParams(dag, launchAttributes, configProperties); - lp.cmd = "hadoop " + ApexYarnLauncher.class.getName(); - HashMap env = new HashMap<>(); - env.put("HADOOP_USER_CLASSPATH_FIRST", "1"); - env.put("HADOOP_CLASSPATH", classpath.toString()); - lp.env = env; - return launchApp(lp); - } - - protected AppHandle launchApp(LaunchParams params) throws IOException { - File tmpFile = File.createTempFile("beam-runner-apex", "params"); - tmpFile.deleteOnExit(); - try (FileOutputStream fos = new FileOutputStream(tmpFile)) { - SerializationUtils.serialize(params, fos); - } - if (params.getCmd() == null) { - ApexYarnLauncher.main(new String[] {tmpFile.getAbsolutePath()}); - } else { - String cmd = params.getCmd() + " " + tmpFile.getAbsolutePath(); - ByteArrayOutputStream consoleOutput = new ByteArrayOutputStream(); - LOG.info("Executing: {} with {}", cmd, params.getEnv()); - - ProcessBuilder pb = new ProcessBuilder("bash", "-c", cmd); - Map env = pb.environment(); - env.putAll(params.getEnv()); - Process p = pb.start(); - ProcessWatcher pw = new ProcessWatcher(p); - InputStream output = p.getInputStream(); - InputStream error = p.getErrorStream(); - while (!pw.isFinished()) { - IOUtils.copy(output, consoleOutput); - IOUtils.copy(error, consoleOutput); - } - if (pw.rc != 0) { - String msg = - "The Beam Apex runner in non-embedded mode requires the Hadoop client" - + " to be installed on the machine from which you launch the job" - + " and the 'hadoop' script in $PATH"; - LOG.error(msg); - throw new RuntimeException( - "Failed to run: " - + cmd - + " (exit code " - + pw.rc - + ")" - + "\n" - + consoleOutput.toString()); - } - } - return new AppHandle() { - @Override - public boolean isFinished() { - // TODO (future PR): interaction with child process - LOG.warn("YARN application runs asynchronously and status check not implemented."); - return true; - } - - @Override - public void shutdown(ShutdownMode arg0) throws LauncherException { - // TODO (future PR): interaction with child process - throw new UnsupportedOperationException(); - } - }; - } - - /** - * From the current classpath, find the jar files that need to be deployed with the application to - * run on YARN. Hadoop dependencies are provided through the Hadoop installation and the - * application should not bundle them to avoid conflicts. This is done by removing the Hadoop - * compile dependencies (transitively) by parsing the Maven dependency tree. - * - * @return list of jar files to ship - * @throws IOException when dependency information cannot be read - */ - public static List getYarnDeployDependencies() throws IOException { - try (InputStream dependencyTree = ApexRunner.class.getResourceAsStream("dependency-tree")) { - try (BufferedReader br = - new BufferedReader(new InputStreamReader(dependencyTree, StandardCharsets.UTF_8))) { - String line; - List excludes = new ArrayList<>(); - int excludeLevel = Integer.MAX_VALUE; - while ((line = br.readLine()) != null) { - for (int i = 0; i < line.length(); i++) { - char c = line.charAt(i); - if (Character.isLetter(c)) { - if (i > excludeLevel) { - excludes.add(line.substring(i)); - } else { - if (line.substring(i).startsWith("org.apache.hadoop")) { - excludeLevel = i; - excludes.add(line.substring(i)); - } else { - excludeLevel = Integer.MAX_VALUE; - } - } - break; - } - } - } - - Set excludeJarFileNames = Sets.newHashSet(); - for (String exclude : excludes) { - List strings = Splitter.on(':').splitToList(exclude); - String[] mvnc = strings.toArray(new String[strings.size()]); - String fileName = mvnc[1] + "-"; - if (mvnc.length == 6) { - fileName += mvnc[4] + "-" + mvnc[3]; // with classifier - } else { - fileName += mvnc[3]; - } - fileName += ".jar"; - excludeJarFileNames.add(fileName); - } - - ClassLoader classLoader = ApexYarnLauncher.class.getClassLoader(); - URL[] urls = ((URLClassLoader) classLoader).getURLs(); - List dependencyJars = new ArrayList<>(); - for (URL url : urls) { - File f = new File(url.getFile()); - // dependencies can also be directories in the build reactor, - // the Apex client will automatically create jar files for those. - if (f.exists() && !excludeJarFileNames.contains(f.getName())) { - dependencyJars.add(f); - } - } - return dependencyJars; - } - } - } - - /** - * Create a jar file from the given directory. - * - * @param dir source directory - * @param jarFile jar file name - * @throws IOException when file cannot be created - */ - public static void createJar(File dir, File jarFile) throws IOException { - - final Map env = Collections.singletonMap("create", "true"); - if (jarFile.exists() && !jarFile.delete()) { - throw new RuntimeException("Failed to remove " + jarFile); - } - URI uri = URI.create("jar:" + jarFile.toURI()); - try (final FileSystem zipfs = FileSystems.newFileSystem(uri, env)) { - - File manifestFile = new File(dir, JarFile.MANIFEST_NAME); - Files.createDirectory(zipfs.getPath("META-INF")); - try (final OutputStream out = Files.newOutputStream(zipfs.getPath(JarFile.MANIFEST_NAME))) { - if (!manifestFile.exists()) { - new Manifest().write(out); - } else { - Files.copy(manifestFile.toPath(), out); - } - } - - final Path root = dir.toPath(); - Files.walkFileTree( - root, - new java.nio.file.SimpleFileVisitor() { - String relativePath; - - @Override - public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) - throws IOException { - relativePath = root.relativize(dir).toString(); - if (!relativePath.isEmpty()) { - if (!relativePath.endsWith("/")) { - relativePath += "/"; - } - if (!"META-INF/".equals(relativePath)) { - final Path dstDir = zipfs.getPath(relativePath); - Files.createDirectory(dstDir); - } - } - return super.preVisitDirectory(dir, attrs); - } - - @Override - public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) - throws IOException { - String name = relativePath + file.getFileName(); - if (!JarFile.MANIFEST_NAME.equals(name)) { - try (final OutputStream out = Files.newOutputStream(zipfs.getPath(name))) { - Files.copy(file, out); - } - } - return super.visitFile(file, attrs); - } - - @Override - public FileVisitResult postVisitDirectory(Path dir, IOException exc) - throws IOException { - relativePath = root.relativize(dir.getParent()).toString(); - if (!relativePath.isEmpty() && !relativePath.endsWith("/")) { - relativePath += "/"; - } - return super.postVisitDirectory(dir, exc); - } - }); - } - } - - /** Transfer the properties to the configuration object. */ - public static void addProperties(Configuration conf, Properties props) { - for (final String propertyName : props.stringPropertyNames()) { - String propertyValue = props.getProperty(propertyName); - conf.set(propertyName, propertyValue); - } - } - - /** - * The main method expects the serialized DAG and will launch the YARN application. - * - * @param args location of launch parameters - * @throws IOException when parameters cannot be read - */ - public static void main(String[] args) throws IOException { - checkArgument(args.length == 1, "exactly one argument expected"); - File file = new File(args[0]); - checkArgument(file.exists() && file.isFile(), "invalid file path %s", file); - final LaunchParams params = SerializationUtils.deserialize(new FileInputStream(file)); - StreamingApplication apexApp = (dag, conf) -> copyShallow(params.dag, dag); - Configuration conf = new Configuration(); // configuration from Hadoop client - addProperties(conf, params.configProperties); - AppHandle appHandle = - params.getApexLauncher().launchApp(apexApp, conf, params.launchAttributes); - if (appHandle == null) { - throw new AssertionError("Launch returns null handle."); - } - // TODO (future PR) - // At this point the application is running, but this process should remain active to - // allow the parent to implement the runner result. - } - - /** Launch parameters that will be serialized and passed to the child process. */ - @VisibleForTesting - protected static class LaunchParams implements Serializable { - private static final long serialVersionUID = 1L; - private final DAG dag; - private final Attribute.AttributeMap launchAttributes; - private final Properties configProperties; - private HashMap env; - private String cmd; - - protected LaunchParams(DAG dag, AttributeMap launchAttributes, Properties configProperties) { - this.dag = dag; - this.launchAttributes = launchAttributes; - this.configProperties = configProperties; - } - - protected Launcher getApexLauncher() { - return Launcher.getLauncher(LaunchMode.YARN); - } - - protected String getCmd() { - return cmd; - } - - protected Map getEnv() { - return env; - } - } - - private static void copyShallow(DAG from, DAG to) { - checkArgument( - from.getClass() == to.getClass(), - "must be same class %s %s", - from.getClass(), - to.getClass()); - Field[] fields = from.getClass().getDeclaredFields(); - AccessibleObject.setAccessible(fields, true); - for (Field field : fields) { - if (!Modifier.isStatic(field.getModifiers())) { - try { - field.set(to, field.get(from)); - } catch (IllegalArgumentException | IllegalAccessException e) { - throw new RuntimeException(e); - } - } - } - } - - /** Starts a command and waits for it to complete. */ - public static class ProcessWatcher implements Runnable { - private final Process p; - private volatile boolean finished = false; - private volatile int rc; - - public ProcessWatcher(Process p) { - this.p = p; - new Thread(this).start(); - } - - public boolean isFinished() { - return finished; - } - - @Override - public void run() { - try { - rc = p.waitFor(); - } catch (Exception e) { - // ignore - } - finished = true; - } - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/TestApexRunner.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/TestApexRunner.java deleted file mode 100644 index c53f48f0ff93..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/TestApexRunner.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex; - -import com.datatorrent.api.DAG; -import java.io.IOException; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.PipelineRunner; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsValidator; -import org.joda.time.Duration; - -/** Apex {@link PipelineRunner} for testing. */ -public class TestApexRunner extends PipelineRunner { - - private static final int RUN_WAIT_MILLIS = 20000; - private final ApexRunner delegate; - - private TestApexRunner(ApexPipelineOptions options) { - options.setEmbeddedExecution(true); - // options.setEmbeddedExecutionDebugMode(false); - this.delegate = ApexRunner.fromOptions(options); - } - - public static TestApexRunner fromOptions(PipelineOptions options) { - ApexPipelineOptions apexOptions = - PipelineOptionsValidator.validate(ApexPipelineOptions.class, options); - return new TestApexRunner(apexOptions); - } - - public static DAG translate(Pipeline pipeline, ApexPipelineOptions options) { - ApexRunner delegate = new ApexRunner(options); - delegate.translateOnly = true; - return delegate.run(pipeline).getApexDAG(); - } - - @Override - @SuppressWarnings("Finally") - public ApexRunnerResult run(Pipeline pipeline) { - ApexRunnerResult result = delegate.run(pipeline); - try { - // this is necessary for tests that just call run() and not waitUntilFinish - result.waitUntilFinish(Duration.millis(RUN_WAIT_MILLIS)); - return result; - } finally { - try { - result.cancel(); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/package-info.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/package-info.java deleted file mode 100644 index cbbea17abc94..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Implementation of the Beam runner for Apache Apex. */ -package org.apache.beam.runners.apex; diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/ApexPipelineTranslator.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/ApexPipelineTranslator.java deleted file mode 100644 index 4cc33f4b838f..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/ApexPipelineTranslator.java +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation; - -import com.datatorrent.api.DAG; -import java.util.HashMap; -import java.util.Map; -import org.apache.beam.runners.apex.ApexPipelineOptions; -import org.apache.beam.runners.apex.ApexRunner.CreateApexPCollectionView; -import org.apache.beam.runners.apex.translation.operators.ApexProcessFnOperator; -import org.apache.beam.runners.apex.translation.operators.ApexReadUnboundedInputOperator; -import org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems; -import org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.GBKIntoKeyedWorkItems; -import org.apache.beam.runners.core.construction.PrimitiveCreate; -import org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.Read; -import org.apache.beam.sdk.runners.TransformHierarchy; -import org.apache.beam.sdk.transforms.Flatten; -import org.apache.beam.sdk.transforms.GroupByKey; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.View.CreatePCollectionView; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PValue; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * {@link ApexPipelineTranslator} translates {@link Pipeline} objects into Apex logical plan {@link - * DAG}. - */ -@SuppressWarnings({"rawtypes", "unchecked"}) -public class ApexPipelineTranslator extends Pipeline.PipelineVisitor.Defaults { - private static final Logger LOG = LoggerFactory.getLogger(ApexPipelineTranslator.class); - - /** - * A map from {@link PTransform} subclass to the corresponding {@link TransformTranslator} to use - * to translate that transform. - */ - private static final Map, TransformTranslator> transformTranslators = - new HashMap<>(); - - private final TranslationContext translationContext; - - static { - // register TransformTranslators - registerTransformTranslator(ParDo.MultiOutput.class, new ParDoTranslator<>()); - registerTransformTranslator( - SplittableParDoViaKeyedWorkItems.ProcessElements.class, - new ParDoTranslator.SplittableProcessElementsTranslator()); - registerTransformTranslator(GBKIntoKeyedWorkItems.class, new GBKIntoKeyedWorkItemsTranslator()); - registerTransformTranslator(Read.Unbounded.class, new ReadUnboundedTranslator()); - registerTransformTranslator(Read.Bounded.class, new ReadBoundedTranslator()); - registerTransformTranslator(GroupByKey.class, new GroupByKeyTranslator()); - registerTransformTranslator(Flatten.PCollections.class, new FlattenPCollectionTranslator()); - registerTransformTranslator(PrimitiveCreate.class, new CreateValuesTranslator()); - registerTransformTranslator( - CreateApexPCollectionView.class, new CreateApexPCollectionViewTranslator()); - registerTransformTranslator(CreatePCollectionView.class, new CreatePCollectionViewTranslator()); - registerTransformTranslator(Window.Assign.class, new WindowAssignTranslator()); - } - - public ApexPipelineTranslator(ApexPipelineOptions options) { - this.translationContext = new TranslationContext(options); - } - - public void translate(Pipeline pipeline, DAG dag) { - pipeline.traverseTopologically(this); - translationContext.populateDAG(dag); - } - - @Override - public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) { - LOG.debug("entering composite transform {}", node.getTransform()); - return CompositeBehavior.ENTER_TRANSFORM; - } - - @Override - public void leaveCompositeTransform(TransformHierarchy.Node node) { - LOG.debug("leaving composite transform {}", node.getTransform()); - } - - @Override - public void visitPrimitiveTransform(TransformHierarchy.Node node) { - LOG.debug("visiting transform {}", node.getTransform()); - PTransform transform = node.getTransform(); - TransformTranslator translator = getTransformTranslator(transform.getClass()); - if (null == translator) { - throw new UnsupportedOperationException("no translator registered for " + transform); - } - translationContext.setCurrentTransform(node.toAppliedPTransform(getPipeline())); - translator.translate(transform, translationContext); - } - - @Override - public void visitValue(PValue value, TransformHierarchy.Node producer) { - LOG.debug("visiting value {}", value); - } - - /** - * Records that instances of the specified PTransform class should be translated by default by the - * corresponding {@link TransformTranslator}. - */ - private static void registerTransformTranslator( - Class transformClass, - TransformTranslator transformTranslator) { - if (transformTranslators.put(transformClass, transformTranslator) != null) { - throw new IllegalArgumentException("defining multiple translators for " + transformClass); - } - } - - /** - * Returns the {@link TransformTranslator} to use for instances of the specified PTransform class, - * or null if none registered. - */ - private > - TransformTranslator getTransformTranslator(Class transformClass) { - return transformTranslators.get(transformClass); - } - - private static class ReadBoundedTranslator implements TransformTranslator> { - private static final long serialVersionUID = 1L; - - @Override - public void translate(Read.Bounded transform, TranslationContext context) { - // TODO: adapter is visibleForTesting - BoundedToUnboundedSourceAdapter unboundedSource = - new BoundedToUnboundedSourceAdapter<>(transform.getSource()); - ApexReadUnboundedInputOperator operator = - new ApexReadUnboundedInputOperator<>(unboundedSource, true, context.getPipelineOptions()); - context.addOperator(operator, operator.output); - } - } - - private static class CreateApexPCollectionViewTranslator - implements TransformTranslator> { - private static final long serialVersionUID = 1L; - - @Override - public void translate( - CreateApexPCollectionView transform, TranslationContext context) { - context.addView(transform.getView()); - LOG.debug("view {}", transform.getView().getName()); - } - } - - private static class CreatePCollectionViewTranslator - implements TransformTranslator> { - private static final long serialVersionUID = 1L; - - @Override - public void translate( - CreatePCollectionView transform, TranslationContext context) { - context.addView(transform.getView()); - LOG.debug("view {}", transform.getView().getName()); - } - } - - private static class GBKIntoKeyedWorkItemsTranslator - implements TransformTranslator> { - - @Override - public void translate(GBKIntoKeyedWorkItems transform, TranslationContext context) { - // https://issues.apache.org/jira/browse/BEAM-1850 - ApexProcessFnOperator> operator = - ApexProcessFnOperator.toKeyedWorkItems(context.getPipelineOptions()); - context.addOperator(operator, operator.outputPort); - context.addStream(context.getInput(), operator.inputPort); - } - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/CreateValuesTranslator.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/CreateValuesTranslator.java deleted file mode 100644 index 025f3b17ad43..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/CreateValuesTranslator.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation; - -import org.apache.beam.runners.apex.translation.operators.ApexReadUnboundedInputOperator; -import org.apache.beam.runners.apex.translation.utils.ValuesSource; -import org.apache.beam.runners.core.construction.PrimitiveCreate; -import org.apache.beam.sdk.io.UnboundedSource; -import org.apache.beam.sdk.values.PCollection; - -/** Wraps elements from Create.Values into an {@link UnboundedSource}. mainly used for testing */ -class CreateValuesTranslator implements TransformTranslator> { - private static final long serialVersionUID = 1451000241832745629L; - - @Override - public void translate(PrimitiveCreate transform, TranslationContext context) { - UnboundedSource unboundedSource = - new ValuesSource<>( - transform.getElements(), ((PCollection) context.getOutput()).getCoder()); - ApexReadUnboundedInputOperator operator = - new ApexReadUnboundedInputOperator<>(unboundedSource, context.getPipelineOptions()); - context.addOperator(operator, operator.output); - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/FlattenPCollectionTranslator.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/FlattenPCollectionTranslator.java deleted file mode 100644 index ff22e9d696aa..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/FlattenPCollectionTranslator.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation; - -import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument; - -import java.util.Collections; -import java.util.List; -import java.util.Map; -import org.apache.beam.runners.apex.translation.operators.ApexFlattenOperator; -import org.apache.beam.runners.apex.translation.operators.ApexReadUnboundedInputOperator; -import org.apache.beam.runners.apex.translation.utils.ValuesSource; -import org.apache.beam.sdk.coders.VoidCoder; -import org.apache.beam.sdk.io.UnboundedSource; -import org.apache.beam.sdk.transforms.Flatten; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PValue; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists; - -/** {@link Flatten.PCollections} translation to Apex operator. */ -class FlattenPCollectionTranslator implements TransformTranslator> { - private static final long serialVersionUID = 1L; - - @Override - public void translate(Flatten.PCollections transform, TranslationContext context) { - List> inputCollections = extractPCollections(context.getInputs()); - - if (inputCollections.isEmpty()) { - // create a dummy source that never emits anything - @SuppressWarnings("unchecked") - UnboundedSource unboundedSource = - new ValuesSource<>(Collections.EMPTY_LIST, VoidCoder.of()); - ApexReadUnboundedInputOperator operator = - new ApexReadUnboundedInputOperator<>(unboundedSource, context.getPipelineOptions()); - context.addOperator(operator, operator.output); - } else if (inputCollections.size() == 1) { - context.addAlias(context.getOutput(), inputCollections.get(0)); - } else { - @SuppressWarnings("unchecked") - PCollection output = (PCollection) context.getOutput(); - Map, Integer> unionTags = Collections.emptyMap(); - flattenCollections(inputCollections, unionTags, output, context); - } - } - - private List> extractPCollections(Map, PValue> inputs) { - List> collections = Lists.newArrayList(); - for (PValue pv : inputs.values()) { - checkArgument( - pv instanceof PCollection, - "Non-PCollection provided as input to flatten: %s of type %s", - pv, - pv.getClass().getSimpleName()); - collections.add((PCollection) pv); - } - return collections; - } - - /** - * Flatten the given collections into the given result collection. Translates into a cascading - * merge with 2 input ports per operator. The optional union tags can be used to identify the - * source in the result stream, used to channel multiple side inputs to a single Apex operator - * port. - * - * @param collections - * @param unionTags - * @param finalCollection - * @param context - */ - static void flattenCollections( - List> collections, - Map, Integer> unionTags, - PCollection finalCollection, - TranslationContext context) { - List> remainingCollections = Lists.newArrayList(); - PCollection firstCollection = null; - while (!collections.isEmpty()) { - for (PCollection collection : collections) { - if (null == firstCollection) { - firstCollection = collection; - } else { - ApexFlattenOperator operator = new ApexFlattenOperator<>(); - context.addStream(firstCollection, operator.data1); - Integer unionTag = unionTags.get(firstCollection); - operator.data1Tag = (unionTag != null) ? unionTag : 0; - context.addStream(collection, operator.data2); - unionTag = unionTags.get(collection); - operator.data2Tag = (unionTag != null) ? unionTag : 0; - - if (!collection.getCoder().equals(firstCollection.getCoder())) { - throw new UnsupportedOperationException("coders don't match"); - } - - if (collections.size() > 2) { - PCollection intermediateCollection = - PCollection.createPrimitiveOutputInternal( - collection.getPipeline(), - collection.getWindowingStrategy(), - collection.isBounded(), - collection.getCoder()); - context.addOperator(operator, operator.out, intermediateCollection); - remainingCollections.add(intermediateCollection); - } else { - // final stream merge - context.addOperator(operator, operator.out, finalCollection); - } - firstCollection = null; - } - } - if (firstCollection != null) { - // push to next merge level - remainingCollections.add(firstCollection); - firstCollection = null; - } - if (remainingCollections.size() > 1) { - collections = remainingCollections; - remainingCollections = Lists.newArrayList(); - } else { - collections = Lists.newArrayList(); - } - } - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/GroupByKeyTranslator.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/GroupByKeyTranslator.java deleted file mode 100644 index 2dd96f0503a5..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/GroupByKeyTranslator.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation; - -import org.apache.beam.runners.apex.translation.operators.ApexGroupByKeyOperator; -import org.apache.beam.sdk.transforms.GroupByKey; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; - -/** {@link GroupByKey} translation to Apex operator. */ -class GroupByKeyTranslator implements TransformTranslator> { - private static final long serialVersionUID = 1L; - - @Override - public void translate(GroupByKey transform, TranslationContext context) { - PCollection> input = context.getInput(); - ApexGroupByKeyOperator group = - new ApexGroupByKeyOperator<>( - context.getPipelineOptions(), input, context.getStateBackend()); - context.addOperator(group, group.output); - context.addStream(input, group.input); - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/ParDoTranslator.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/ParDoTranslator.java deleted file mode 100644 index 7144a0eb3454..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/ParDoTranslator.java +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation; - -import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument; - -import com.datatorrent.api.Operator; -import com.datatorrent.api.Operator.OutputPort; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.stream.Collectors; -import org.apache.beam.runners.apex.ApexRunner; -import org.apache.beam.runners.apex.translation.operators.ApexParDoOperator; -import org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessElements; -import org.apache.beam.runners.core.construction.ParDoTranslation; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.DoFnSchemaInformation; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.reflect.DoFnSignatures; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionView; -import org.apache.beam.sdk.values.PValue; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * {@link ParDo.MultiOutput} is translated to {@link ApexParDoOperator} that wraps the {@link DoFn}. - */ -class ParDoTranslator - implements TransformTranslator> { - private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(ParDoTranslator.class); - - @Override - public void translate(ParDo.MultiOutput transform, TranslationContext context) { - DoFn doFn = transform.getFn(); - - if (DoFnSignatures.isSplittable(doFn)) { - throw new UnsupportedOperationException( - String.format( - "%s does not support splittable DoFn: %s", ApexRunner.class.getSimpleName(), doFn)); - } - if (DoFnSignatures.requiresTimeSortedInput(doFn)) { - throw new UnsupportedOperationException( - String.format( - "%s doesn't currently support @RequiresTimeSortedInput", - ApexRunner.class.getSimpleName())); - } - if (DoFnSignatures.usesTimers(doFn)) { - throw new UnsupportedOperationException( - String.format( - "Found %s annotations on %s, but %s cannot yet be used with timers in the %s.", - DoFn.TimerId.class.getSimpleName(), - doFn.getClass().getName(), - DoFn.class.getSimpleName(), - ApexRunner.class.getSimpleName())); - } - - Map, PValue> outputs = context.getOutputs(); - PCollection input = context.getInput(); - Iterable> sideInputs = transform.getSideInputs().values(); - - DoFnSchemaInformation doFnSchemaInformation; - doFnSchemaInformation = ParDoTranslation.getSchemaInformation(context.getCurrentTransform()); - - Map> sideInputMapping = - ParDoTranslation.getSideInputMapping(context.getCurrentTransform()); - - Map, Coder> outputCoders = - outputs.entrySet().stream() - .filter(e -> e.getValue() instanceof PCollection) - .collect( - Collectors.toMap(e -> e.getKey(), e -> ((PCollection) e.getValue()).getCoder())); - ApexParDoOperator operator = - new ApexParDoOperator<>( - context.getPipelineOptions(), - doFn, - transform.getMainOutputTag(), - transform.getAdditionalOutputTags().getAll(), - input.getWindowingStrategy(), - sideInputs, - input.getCoder(), - outputCoders, - doFnSchemaInformation, - sideInputMapping, - context.getStateBackend()); - - Map, OutputPort> ports = Maps.newHashMapWithExpectedSize(outputs.size()); - for (Entry, PValue> output : outputs.entrySet()) { - checkArgument( - output.getValue() instanceof PCollection, - "%s %s outputs non-PCollection %s of type %s", - ParDo.MultiOutput.class.getSimpleName(), - context.getFullName(), - output.getValue(), - output.getValue().getClass().getSimpleName()); - PCollection pc = (PCollection) output.getValue(); - if (output.getKey().equals(transform.getMainOutputTag())) { - ports.put(pc, operator.output); - } else { - int portIndex = 0; - for (TupleTag tag : transform.getAdditionalOutputTags().getAll()) { - if (tag.equals(output.getKey())) { - ports.put(pc, operator.additionalOutputPorts[portIndex]); - break; - } - portIndex++; - } - } - } - context.addOperator(operator, ports); - context.addStream(context.getInput(), operator.input); - if (!Iterables.isEmpty(sideInputs)) { - addSideInputs(operator.sideInput1, sideInputs, context); - } - } - - static class SplittableProcessElementsTranslator< - InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT> - implements TransformTranslator< - ProcessElements> { - - @Override - public void translate( - ProcessElements - transform, - TranslationContext context) { - - Map, PValue> outputs = context.getOutputs(); - PCollection input = context.getInput(); - Iterable> sideInputs = transform.getSideInputs(); - - Map, Coder> outputCoders = - outputs.entrySet().stream() - .filter(e -> e.getValue() instanceof PCollection) - .collect( - Collectors.toMap(e -> e.getKey(), e -> ((PCollection) e.getValue()).getCoder())); - - @SuppressWarnings({"rawtypes", "unchecked"}) - DoFn doFn = (DoFn) transform.newProcessFn(transform.getFn()); - ApexParDoOperator operator = - new ApexParDoOperator<>( - context.getPipelineOptions(), - doFn, - transform.getMainOutputTag(), - transform.getAdditionalOutputTags().getAll(), - input.getWindowingStrategy(), - sideInputs, - input.getCoder(), - outputCoders, - DoFnSchemaInformation.create(), - Collections.emptyMap(), - context.getStateBackend()); - - Map, OutputPort> ports = Maps.newHashMapWithExpectedSize(outputs.size()); - for (Entry, PValue> output : outputs.entrySet()) { - checkArgument( - output.getValue() instanceof PCollection, - "%s %s outputs non-PCollection %s of type %s", - ParDo.MultiOutput.class.getSimpleName(), - context.getFullName(), - output.getValue(), - output.getValue().getClass().getSimpleName()); - PCollection pc = (PCollection) output.getValue(); - if (output.getKey().equals(transform.getMainOutputTag())) { - ports.put(pc, operator.output); - } else { - int portIndex = 0; - for (TupleTag tag : transform.getAdditionalOutputTags().getAll()) { - if (tag.equals(output.getKey())) { - ports.put(pc, operator.additionalOutputPorts[portIndex]); - break; - } - portIndex++; - } - } - } - - context.addOperator(operator, ports); - context.addStream(context.getInput(), operator.input); - if (!Iterables.isEmpty(sideInputs)) { - addSideInputs(operator.sideInput1, sideInputs, context); - } - } - } - - static void addSideInputs( - Operator.InputPort sideInputPort, - Iterable> sideInputs, - TranslationContext context) { - Operator.InputPort[] sideInputPorts = {sideInputPort}; - if (Iterables.size(sideInputs) > sideInputPorts.length) { - PCollection unionCollection = unionSideInputs(sideInputs, context); - context.addStream(unionCollection, sideInputPorts[0]); - } else { - // the number of ports for side inputs is fixed and each port can only take one input. - for (int i = 0; i < Iterables.size(sideInputs); i++) { - context.addStream(context.getViewInput(Iterables.get(sideInputs, i)), sideInputPorts[i]); - } - } - } - - private static PCollection unionSideInputs( - Iterable> sideInputs, TranslationContext context) { - checkArgument(Iterables.size(sideInputs) > 1, "requires multiple side inputs"); - // flatten and assign union tag - List> sourceCollections = new ArrayList<>(); - Map, Integer> unionTags = new HashMap<>(); - PCollection firstSideInput = context.getViewInput(Iterables.get(sideInputs, 0)); - for (int i = 0; i < Iterables.size(sideInputs); i++) { - PCollectionView sideInput = Iterables.get(sideInputs, i); - PCollection sideInputCollection = context.getViewInput(sideInput); - if (!sideInputCollection - .getWindowingStrategy() - .equals(firstSideInput.getWindowingStrategy())) { - // TODO: check how to handle this in stream codec - // String msg = "Multiple side inputs with different window strategies."; - // throw new UnsupportedOperationException(msg); - LOG.warn( - "Side inputs union with different windowing strategies {} {}", - firstSideInput.getWindowingStrategy(), - sideInputCollection.getWindowingStrategy()); - } - if (!sideInputCollection.getCoder().equals(firstSideInput.getCoder())) { - String msg = context.getFullName() + ": Multiple side inputs with different coders."; - throw new UnsupportedOperationException(msg); - } - sourceCollections.add(context.getViewInput(sideInput)); - unionTags.put(sideInputCollection, i); - } - - PCollection resultCollection = - PCollection.createPrimitiveOutputInternal( - firstSideInput.getPipeline(), - firstSideInput.getWindowingStrategy(), - firstSideInput.isBounded(), - firstSideInput.getCoder()); - FlattenPCollectionTranslator.flattenCollections( - sourceCollections, unionTags, resultCollection, context); - return resultCollection; - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/ReadUnboundedTranslator.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/ReadUnboundedTranslator.java deleted file mode 100644 index ed035fa17209..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/ReadUnboundedTranslator.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation; - -import com.datatorrent.api.InputOperator; -import org.apache.beam.runners.apex.translation.operators.ApexReadUnboundedInputOperator; -import org.apache.beam.sdk.io.Read; -import org.apache.beam.sdk.io.UnboundedSource; - -/** - * {@link Read.Unbounded} is translated to Apex {@link InputOperator} that wraps {@link - * UnboundedSource}. - */ -class ReadUnboundedTranslator implements TransformTranslator> { - private static final long serialVersionUID = 1L; - - @Override - public void translate(Read.Unbounded transform, TranslationContext context) { - UnboundedSource unboundedSource = transform.getSource(); - ApexReadUnboundedInputOperator operator = - new ApexReadUnboundedInputOperator<>(unboundedSource, context.getPipelineOptions()); - context.addOperator(operator, operator.output); - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/TransformTranslator.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/TransformTranslator.java deleted file mode 100644 index 4702f44c1a73..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/TransformTranslator.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation; - -import java.io.Serializable; -import org.apache.beam.sdk.transforms.PTransform; - -/** Translates {@link PTransform} to Apex functions. */ -interface TransformTranslator> extends Serializable { - void translate(T transform, TranslationContext context); -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/TranslationContext.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/TranslationContext.java deleted file mode 100644 index 4a19f14f6e2d..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/TranslationContext.java +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation; - -import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument; - -import com.datatorrent.api.Context.PortContext; -import com.datatorrent.api.DAG; -import com.datatorrent.api.Operator; -import com.datatorrent.api.Operator.InputPort; -import com.datatorrent.api.Operator.OutputPort; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.ImmutablePair; -import org.apache.beam.repackaged.core.org.apache.commons.lang3.tuple.Pair; -import org.apache.beam.runners.apex.ApexPipelineOptions; -import org.apache.beam.runners.apex.translation.utils.ApexStateInternals; -import org.apache.beam.runners.apex.translation.utils.ApexStateInternals.ApexStateBackend; -import org.apache.beam.runners.apex.translation.utils.ApexStreamTuple; -import org.apache.beam.runners.apex.translation.utils.CoderAdapterStreamCodec; -import org.apache.beam.runners.core.construction.TransformInputs; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.runners.AppliedPTransform; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionView; -import org.apache.beam.sdk.values.PInput; -import org.apache.beam.sdk.values.PValue; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables; - -/** Maintains context data for {@link TransformTranslator}s. */ -@SuppressWarnings({"rawtypes", "unchecked", "TypeParameterUnusedInFormals"}) -class TranslationContext { - - private final ApexPipelineOptions pipelineOptions; - private AppliedPTransform currentTransform; - private final Map>> streams = - new HashMap<>(); - private final Map operators = new HashMap<>(); - private final Map, PInput> viewInputs = new HashMap<>(); - private Map aliasCollections = new HashMap<>(); - - public void addView(PCollectionView view) { - this.viewInputs.put(view, this.getInput()); - } - - public InputT getViewInput(PCollectionView view) { - PInput input = this.viewInputs.get(view); - checkArgument(input != null, "unknown view " + view.getName()); - return (InputT) input; - } - - TranslationContext(ApexPipelineOptions pipelineOptions) { - this.pipelineOptions = pipelineOptions; - } - - public void setCurrentTransform(AppliedPTransform transform) { - this.currentTransform = transform; - } - - public ApexPipelineOptions getPipelineOptions() { - return pipelineOptions; - } - - public String getFullName() { - return getCurrentTransform().getFullName(); - } - - public Map, PValue> getInputs() { - return getCurrentTransform().getInputs(); - } - - public InputT getInput() { - return (InputT) - Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(getCurrentTransform())); - } - - public Map, PValue> getOutputs() { - return getCurrentTransform().getOutputs(); - } - - public OutputT getOutput() { - return (OutputT) Iterables.getOnlyElement(getCurrentTransform().getOutputs().values()); - } - - public AppliedPTransform getCurrentTransform() { - checkArgument(currentTransform != null, "current transform not set"); - return currentTransform; - } - - public void addOperator(Operator operator, OutputPort port) { - addOperator(operator, port, getOutput()); - } - - /** - * Register operator and output ports for the given collections. - * - * @param operator - * @param ports - */ - public void addOperator(Operator operator, Map, OutputPort> ports) { - boolean first = true; - for (Map.Entry, OutputPort> portEntry : ports.entrySet()) { - if (first) { - addOperator(operator, portEntry.getValue(), portEntry.getKey()); - first = false; - } else { - this.streams.put( - portEntry.getKey(), - (Pair) - new ImmutablePair<>( - new OutputPortInfo(portEntry.getValue(), getCurrentTransform()), - new ArrayList<>())); - } - } - } - - /** - * Add the operator with its output port for the given result {link PCollection}. - * - * @param operator - * @param port - * @param output - */ - public void addOperator(Operator operator, OutputPort port, PCollection output) { - // Apex DAG requires a unique operator name - // use the transform's name and make it unique - String name = getCurrentTransform().getFullName(); - for (int i = 1; this.operators.containsKey(name); i++) { - name = getCurrentTransform().getFullName() + i; - } - this.operators.put(name, operator); - this.streams.put( - output, - (Pair) - new ImmutablePair<>( - new OutputPortInfo(port, getCurrentTransform()), new ArrayList<>())); - } - - public void addStream(PInput input, InputPort inputPort) { - while (aliasCollections.containsKey(input)) { - input = aliasCollections.get(input); - } - - Pair> stream = this.streams.get(input); - checkArgument(stream != null, "no upstream operator defined for " + input); - stream.getRight().add(new InputPortInfo(inputPort, getCurrentTransform())); - } - - /** - * Set the given output as alias for another input, i.e. there won't be a stream representation in - * the target DAG. - * - * @param alias - * @param source - */ - public void addAlias(PValue alias, PInput source) { - aliasCollections.put(alias, source); - } - - public void populateDAG(DAG dag) { - for (Map.Entry nameAndOperator : this.operators.entrySet()) { - dag.addOperator(nameAndOperator.getKey(), nameAndOperator.getValue()); - } - - int streamIndex = 0; - for (Map.Entry>> streamEntry : - this.streams.entrySet()) { - List destInfo = streamEntry.getValue().getRight(); - InputPort[] sinks = new InputPort[destInfo.size()]; - for (int i = 0; i < sinks.length; i++) { - sinks[i] = destInfo.get(i).port; - } - - if (sinks.length > 0) { - DAG.StreamMeta streamMeta = - dag.addStream("stream" + streamIndex++, streamEntry.getValue().getLeft().port, sinks); - if (pipelineOptions.isParDoFusionEnabled()) { - optimizeStreams(streamMeta, streamEntry); - } - - for (InputPort port : sinks) { - PCollection pc = streamEntry.getKey(); - Coder coder = pc.getCoder(); - if (pc.getWindowingStrategy() != null) { - coder = - FullWindowedValueCoder.of( - pc.getCoder(), pc.getWindowingStrategy().getWindowFn().windowCoder()); - } - Coder wrapperCoder = ApexStreamTuple.ApexStreamTupleCoder.of(coder); - CoderAdapterStreamCodec streamCodec = new CoderAdapterStreamCodec(wrapperCoder); - dag.setInputPortAttribute(port, PortContext.STREAM_CODEC, streamCodec); - } - } - } - } - - private void optimizeStreams( - DAG.StreamMeta streamMeta, - Map.Entry>> streamEntry) { - DAG.Locality loc = null; - - List sinks = streamEntry.getValue().getRight(); - OutputPortInfo source = streamEntry.getValue().getLeft(); - PTransform sourceTransform = source.transform.getTransform(); - if (sourceTransform instanceof ParDo.MultiOutput || sourceTransform instanceof Window.Assign) { - // source qualifies for chaining, check sink(s) - for (InputPortInfo sink : sinks) { - PTransform transform = sink.transform.getTransform(); - if (transform instanceof ParDo.MultiOutput) { - ParDo.MultiOutput t = (ParDo.MultiOutput) transform; - if (t.getSideInputs().size() > 0) { - loc = DAG.Locality.CONTAINER_LOCAL; - break; - } else { - loc = DAG.Locality.THREAD_LOCAL; - } - } else if (transform instanceof Window.Assign) { - loc = DAG.Locality.THREAD_LOCAL; - } else { - // cannot chain, if there is any other sink - loc = null; - break; - } - } - } - - streamMeta.setLocality(loc); - } - - /** - * Return the state backend for the pipeline translation. - * - * @return - */ - public ApexStateBackend getStateBackend() { - return new ApexStateInternals.ApexStateBackend(); - } - - static class InputPortInfo { - InputPort port; - AppliedPTransform transform; - - public InputPortInfo(InputPort port, AppliedPTransform transform) { - this.port = port; - this.transform = transform; - } - } - - static class OutputPortInfo { - OutputPort port; - AppliedPTransform transform; - - public OutputPortInfo(OutputPort port, AppliedPTransform transform) { - this.port = port; - this.transform = transform; - } - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/WindowAssignTranslator.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/WindowAssignTranslator.java deleted file mode 100644 index c357651000b1..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/WindowAssignTranslator.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation; - -import org.apache.beam.runners.apex.translation.operators.ApexProcessFnOperator; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.transforms.windowing.WindowFn; -import org.apache.beam.sdk.values.PCollection; - -/** {@link Window} is translated to {@link ApexProcessFnOperator#assignWindows}. */ -class WindowAssignTranslator implements TransformTranslator> { - private static final long serialVersionUID = 1L; - - @Override - public void translate(Window.Assign transform, TranslationContext context) { - PCollection output = context.getOutput(); - PCollection input = context.getInput(); - - if (transform.getWindowFn() == null) { - // no work to do - context.addAlias(output, input); - } else { - @SuppressWarnings("unchecked") - WindowFn windowFn = (WindowFn) transform.getWindowFn(); - ApexProcessFnOperator operator = - ApexProcessFnOperator.assignWindows(windowFn, context.getPipelineOptions()); - context.addOperator(operator, operator.outputPort); - context.addStream(context.getInput(), operator.inputPort); - } - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexFlattenOperator.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexFlattenOperator.java deleted file mode 100644 index 268bb194592a..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexFlattenOperator.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation.operators; - -import com.datatorrent.api.DefaultInputPort; -import com.datatorrent.api.DefaultOutputPort; -import com.datatorrent.api.annotation.OutputPortFieldAnnotation; -import com.datatorrent.common.util.BaseOperator; -import org.apache.beam.runners.apex.translation.utils.ApexStreamTuple; -import org.apache.beam.runners.apex.translation.utils.ApexStreamTuple.WatermarkTuple; -import org.apache.beam.sdk.transforms.Flatten.PCollections; -import org.apache.beam.sdk.util.WindowedValue; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Apex operator for Beam {@link PCollections}. */ -public class ApexFlattenOperator extends BaseOperator { - - private static final Logger LOG = LoggerFactory.getLogger(ApexFlattenOperator.class); - private boolean traceTuples = false; - - private long inputWM1; - private long inputWM2; - private long outputWM; - - public int data1Tag; - public int data2Tag; - - /** Data input port 1. */ - public final transient DefaultInputPort>> data1 = - new DefaultInputPort>>() { - /** Emits to port "out" */ - @Override - public void process(ApexStreamTuple> tuple) { - if (tuple instanceof WatermarkTuple) { - WatermarkTuple wmTuple = (WatermarkTuple) tuple; - if (wmTuple.getTimestamp() > inputWM1) { - inputWM1 = wmTuple.getTimestamp(); - if (inputWM1 <= inputWM2) { - // move output watermark and emit it - outputWM = inputWM1; - if (traceTuples) { - LOG.debug("\nemitting watermark {}\n", outputWM); - } - out.emit(tuple); - } - } - return; - } - if (traceTuples) { - LOG.debug("\nemitting {}\n", tuple); - } - - if (data1Tag > 0 && tuple instanceof ApexStreamTuple.DataTuple) { - ((ApexStreamTuple.DataTuple) tuple).setUnionTag(data1Tag); - } - out.emit(tuple); - } - }; - - /** Data input port 2. */ - public final transient DefaultInputPort>> data2 = - new DefaultInputPort>>() { - /** Emits to port "out" */ - @Override - public void process(ApexStreamTuple> tuple) { - if (tuple instanceof WatermarkTuple) { - WatermarkTuple wmTuple = (WatermarkTuple) tuple; - if (wmTuple.getTimestamp() > inputWM2) { - inputWM2 = wmTuple.getTimestamp(); - if (inputWM2 <= inputWM1) { - // move output watermark and emit it - outputWM = inputWM2; - if (traceTuples) { - LOG.debug("\nemitting watermark {}\n", outputWM); - } - out.emit(tuple); - } - } - return; - } - if (traceTuples) { - LOG.debug("\nemitting {}\n", tuple); - } - - if (data2Tag > 0 && tuple instanceof ApexStreamTuple.DataTuple) { - ((ApexStreamTuple.DataTuple) tuple).setUnionTag(data2Tag); - } - out.emit(tuple); - } - }; - - /** Output port. */ - @OutputPortFieldAnnotation(optional = true) - public final transient DefaultOutputPort>> out = - new DefaultOutputPort<>(); -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexGroupByKeyOperator.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexGroupByKeyOperator.java deleted file mode 100644 index 6f90f5808b32..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexGroupByKeyOperator.java +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation.operators; - -import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull; - -import com.datatorrent.api.Context.OperatorContext; -import com.datatorrent.api.DefaultInputPort; -import com.datatorrent.api.DefaultOutputPort; -import com.datatorrent.api.Operator; -import com.datatorrent.api.StreamCodec; -import com.datatorrent.api.annotation.OutputPortFieldAnnotation; -import com.esotericsoftware.kryo.serializers.FieldSerializer.Bind; -import com.esotericsoftware.kryo.serializers.JavaSerializer; -import java.util.Collection; -import java.util.Collections; -import org.apache.beam.runners.apex.ApexPipelineOptions; -import org.apache.beam.runners.apex.translation.utils.ApexStateInternals.ApexStateBackend; -import org.apache.beam.runners.apex.translation.utils.ApexStreamTuple; -import org.apache.beam.runners.core.NullSideInputReader; -import org.apache.beam.runners.core.OutputWindowedValue; -import org.apache.beam.runners.core.ReduceFnRunner; -import org.apache.beam.runners.core.StateInternalsFactory; -import org.apache.beam.runners.core.SystemReduceFn; -import org.apache.beam.runners.core.TimerInternals; -import org.apache.beam.runners.core.TimerInternals.TimerData; -import org.apache.beam.runners.core.construction.SerializablePipelineOptions; -import org.apache.beam.runners.core.construction.TriggerTranslation; -import org.apache.beam.runners.core.triggers.ExecutableTriggerStateMachine; -import org.apache.beam.runners.core.triggers.TriggerStateMachines; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.KvCoder; -import org.apache.beam.sdk.state.TimeDomain; -import org.apache.beam.sdk.transforms.GroupByKey; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.PaneInfo; -import org.apache.beam.sdk.util.WindowedValue; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.WindowingStrategy; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Throwables; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Apex operator for Beam {@link GroupByKey}. This operator expects the input stream already - * partitioned by K, which is determined by the {@link StreamCodec} on the input port. - * - * @param key type - * @param value type - */ -public class ApexGroupByKeyOperator - implements Operator, ApexTimerInternals.TimerProcessor { - private static final Logger LOG = LoggerFactory.getLogger(ApexGroupByKeyOperator.class); - private boolean traceTuples = true; - - @Bind(JavaSerializer.class) - private WindowingStrategy windowingStrategy; - - @Bind(JavaSerializer.class) - private Coder keyCoder; - - @Bind(JavaSerializer.class) - private Coder valueCoder; - - @Bind(JavaSerializer.class) - private final SerializablePipelineOptions serializedOptions; - - @Bind(JavaSerializer.class) - private final StateInternalsFactory stateInternalsFactory; - - private final ApexTimerInternals timerInternals; - private Instant inputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE; - - public final transient DefaultInputPort>>> input = - new DefaultInputPort>>>() { - @Override - public void process(ApexStreamTuple>> t) { - try { - if (t instanceof ApexStreamTuple.WatermarkTuple) { - ApexStreamTuple.WatermarkTuple mark = (ApexStreamTuple.WatermarkTuple) t; - processWatermark(mark); - if (traceTuples) { - LOG.debug("\nemitting watermark {}\n", mark.getTimestamp()); - } - output.emit(ApexStreamTuple.WatermarkTuple.of(mark.getTimestamp())); - return; - } - if (traceTuples) { - LOG.debug("\ninput {}\n", t.getValue()); - } - processElement(t.getValue()); - } catch (Exception e) { - Throwables.throwIfUnchecked(e); - throw new RuntimeException(e); - } - } - }; - - @OutputPortFieldAnnotation(optional = true) - public final transient DefaultOutputPort>>>> - output = new DefaultOutputPort<>(); - - @SuppressWarnings("unchecked") - public ApexGroupByKeyOperator( - ApexPipelineOptions pipelineOptions, - PCollection> input, - ApexStateBackend stateBackend) { - checkNotNull(pipelineOptions); - this.serializedOptions = new SerializablePipelineOptions(pipelineOptions); - this.windowingStrategy = (WindowingStrategy) input.getWindowingStrategy(); - this.keyCoder = ((KvCoder) input.getCoder()).getKeyCoder(); - this.valueCoder = ((KvCoder) input.getCoder()).getValueCoder(); - this.stateInternalsFactory = stateBackend.newStateInternalsFactory(keyCoder); - TimerInternals.TimerDataCoderV2 timerCoder = - TimerInternals.TimerDataCoderV2.of(windowingStrategy.getWindowFn().windowCoder()); - this.timerInternals = new ApexTimerInternals<>(timerCoder); - } - - @SuppressWarnings("unused") // for Kryo - private ApexGroupByKeyOperator() { - this.serializedOptions = null; - this.stateInternalsFactory = null; - this.timerInternals = null; - } - - @Override - public void beginWindow(long l) {} - - @Override - public void endWindow() { - timerInternals.fireReadyTimers( - timerInternals.currentProcessingTime().getMillis(), this, TimeDomain.PROCESSING_TIME); - } - - @Override - public void setup(OperatorContext context) { - this.traceTuples = - ApexStreamTuple.Logging.isDebugEnabled( - serializedOptions.get().as(ApexPipelineOptions.class), this); - } - - @Override - public void teardown() {} - - private ReduceFnRunner, BoundedWindow> newReduceFnRunner(K key) { - return new ReduceFnRunner<>( - key, - windowingStrategy, - ExecutableTriggerStateMachine.create( - TriggerStateMachines.stateMachineForTrigger( - TriggerTranslation.toProto(windowingStrategy.getTrigger()))), - stateInternalsFactory.stateInternalsForKey(key), - timerInternals, - new OutputWindowedValue>>() { - @Override - public void outputWindowedValue( - KV> output, - Instant timestamp, - Collection windows, - PaneInfo pane) { - if (traceTuples) { - LOG.debug("\nemitting {} timestamp {}\n", output, timestamp); - } - ApexGroupByKeyOperator.this.output.emit( - ApexStreamTuple.DataTuple.of(WindowedValue.of(output, timestamp, windows, pane))); - } - - @Override - public void outputWindowedValue( - TupleTag tag, - AdditionalOutputT output, - Instant timestamp, - Collection windows, - PaneInfo pane) { - throw new UnsupportedOperationException( - "GroupAlsoByWindow should not use side outputs"); - } - }, - NullSideInputReader.empty(), - SystemReduceFn.buffering(this.valueCoder), - serializedOptions.get()); - } - - private void processElement(WindowedValue> windowedValue) throws Exception { - final KV kv = windowedValue.getValue(); - final WindowedValue updatedWindowedValue = - WindowedValue.of( - kv.getValue(), - windowedValue.getTimestamp(), - windowedValue.getWindows(), - windowedValue.getPane()); - timerInternals.setContext(kv.getKey(), this.keyCoder, this.inputWatermark, null); - ReduceFnRunner, BoundedWindow> reduceFnRunner = - newReduceFnRunner(kv.getKey()); - reduceFnRunner.processElements(Collections.singletonList(updatedWindowedValue)); - reduceFnRunner.persist(); - } - - @Override - public void fireTimer(K key, Collection timerData) { - timerInternals.setContext(key, keyCoder, inputWatermark, null); - ReduceFnRunner, BoundedWindow> reduceFnRunner = newReduceFnRunner(key); - try { - reduceFnRunner.onTimers(timerData); - } catch (Exception e) { - Throwables.throwIfUnchecked(e); - throw new RuntimeException(e); - } - reduceFnRunner.persist(); - } - - private void processWatermark(ApexStreamTuple.WatermarkTuple mark) { - this.inputWatermark = new Instant(mark.getTimestamp()); - timerInternals.fireReadyTimers(this.inputWatermark.getMillis(), this, TimeDomain.EVENT_TIME); - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexParDoOperator.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexParDoOperator.java deleted file mode 100644 index 7e55b67f6f74..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexParDoOperator.java +++ /dev/null @@ -1,601 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation.operators; - -import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument; -import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState; - -import com.datatorrent.api.Context.OperatorContext; -import com.datatorrent.api.DefaultInputPort; -import com.datatorrent.api.DefaultOutputPort; -import com.datatorrent.api.annotation.InputPortFieldAnnotation; -import com.datatorrent.api.annotation.OutputPortFieldAnnotation; -import com.datatorrent.common.util.BaseOperator; -import com.esotericsoftware.kryo.serializers.FieldSerializer.Bind; -import com.esotericsoftware.kryo.serializers.JavaSerializer; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.concurrent.Executors; -import org.apache.beam.runners.apex.ApexPipelineOptions; -import org.apache.beam.runners.apex.ApexRunner; -import org.apache.beam.runners.apex.translation.utils.ApexStateInternals.ApexStateBackend; -import org.apache.beam.runners.apex.translation.utils.ApexStreamTuple; -import org.apache.beam.runners.apex.translation.utils.NoOpStepContext; -import org.apache.beam.runners.apex.translation.utils.StateInternalsProxy; -import org.apache.beam.runners.apex.translation.utils.ValueAndCoderKryoSerializable; -import org.apache.beam.runners.core.DoFnRunner; -import org.apache.beam.runners.core.DoFnRunners; -import org.apache.beam.runners.core.DoFnRunners.OutputManager; -import org.apache.beam.runners.core.KeyedWorkItem; -import org.apache.beam.runners.core.KeyedWorkItemCoder; -import org.apache.beam.runners.core.NullSideInputReader; -import org.apache.beam.runners.core.OutputAndTimeBoundedSplittableProcessElementInvoker; -import org.apache.beam.runners.core.OutputWindowedValue; -import org.apache.beam.runners.core.PushbackSideInputDoFnRunner; -import org.apache.beam.runners.core.SideInputHandler; -import org.apache.beam.runners.core.SideInputReader; -import org.apache.beam.runners.core.SimplePushbackSideInputDoFnRunner; -import org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn; -import org.apache.beam.runners.core.StateInternals; -import org.apache.beam.runners.core.StateInternalsFactory; -import org.apache.beam.runners.core.StateNamespace; -import org.apache.beam.runners.core.StateNamespaces.WindowNamespace; -import org.apache.beam.runners.core.StatefulDoFnRunner; -import org.apache.beam.runners.core.TimerInternals; -import org.apache.beam.runners.core.TimerInternals.TimerData; -import org.apache.beam.runners.core.TimerInternalsFactory; -import org.apache.beam.runners.core.construction.SerializablePipelineOptions; -import org.apache.beam.sdk.coders.ByteArrayCoder; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.KvCoder; -import org.apache.beam.sdk.coders.ListCoder; -import org.apache.beam.sdk.coders.VoidCoder; -import org.apache.beam.sdk.state.TimeDomain; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.DoFnSchemaInformation; -import org.apache.beam.sdk.transforms.reflect.DoFnInvoker; -import org.apache.beam.sdk.transforms.reflect.DoFnInvokers; -import org.apache.beam.sdk.transforms.reflect.DoFnSignature; -import org.apache.beam.sdk.transforms.reflect.DoFnSignatures; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.PaneInfo; -import org.apache.beam.sdk.util.UserCodeException; -import org.apache.beam.sdk.util.WindowedValue; -import org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder; -import org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollectionView; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.WindowingStrategy; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Apex operator for Beam {@link DoFn}. */ -public class ApexParDoOperator extends BaseOperator - implements OutputManager, ApexTimerInternals.TimerProcessor { - private static final Logger LOG = LoggerFactory.getLogger(ApexParDoOperator.class); - private boolean traceTuples = true; - - @Bind(JavaSerializer.class) - private final SerializablePipelineOptions pipelineOptions; - - @Bind(JavaSerializer.class) - private final DoFn doFn; - - @Bind(JavaSerializer.class) - private final TupleTag mainOutputTag; - - @Bind(JavaSerializer.class) - private final List> additionalOutputTags; - - @Bind(JavaSerializer.class) - private final WindowingStrategy windowingStrategy; - - @Bind(JavaSerializer.class) - private final Iterable> sideInputs; - - @Bind(JavaSerializer.class) - private final Coder> windowedInputCoder; - - @Bind(JavaSerializer.class) - private final Coder inputCoder; - - @Bind(JavaSerializer.class) - private final Map, Coder> outputCoders; - - @Bind(JavaSerializer.class) - private final DoFnSchemaInformation doFnSchemaInformation; - - @Bind(JavaSerializer.class) - private final Map> sideInputMapping; - - private StateInternalsProxy currentKeyStateInternals; - private final ApexTimerInternals currentKeyTimerInternals; - - private final StateInternals sideInputStateInternals; - private final ValueAndCoderKryoSerializable>> pushedBack; - - private LongMin pushedBackWatermark = new LongMin(); - private long currentInputWatermark = Long.MIN_VALUE; - private long currentOutputWatermark = currentInputWatermark; - - private transient PushbackSideInputDoFnRunner pushbackDoFnRunner; - private transient SideInputHandler sideInputHandler; - private transient Map, DefaultOutputPort>> - additionalOutputPortMapping = Maps.newHashMapWithExpectedSize(5); - private transient DoFnInvoker doFnInvoker; - - /** Constructor. */ - public ApexParDoOperator( - ApexPipelineOptions pipelineOptions, - DoFn doFn, - TupleTag mainOutputTag, - List> additionalOutputTags, - WindowingStrategy windowingStrategy, - Iterable> sideInputs, - Coder inputCoder, - Map, Coder> outputCoders, - DoFnSchemaInformation doFnSchemaInformation, - Map> sideInputMapping, - ApexStateBackend stateBackend) { - this.pipelineOptions = new SerializablePipelineOptions(pipelineOptions); - this.doFn = doFn; - this.mainOutputTag = mainOutputTag; - this.additionalOutputTags = additionalOutputTags; - this.windowingStrategy = windowingStrategy; - this.sideInputs = sideInputs; - this.sideInputStateInternals = - new StateInternalsProxy<>(stateBackend.newStateInternalsFactory(VoidCoder.of())); - - if (additionalOutputTags.size() > additionalOutputPorts.length) { - String msg = - String.format( - "Too many additional outputs (currently only supporting %s).", - additionalOutputPorts.length); - throw new UnsupportedOperationException(msg); - } - - WindowedValueCoder wvCoder = - FullWindowedValueCoder.of(inputCoder, this.windowingStrategy.getWindowFn().windowCoder()); - Coder>> listCoder = ListCoder.of(wvCoder); - this.pushedBack = new ValueAndCoderKryoSerializable<>(new ArrayList<>(), listCoder); - this.windowedInputCoder = wvCoder; - this.inputCoder = inputCoder; - this.outputCoders = outputCoders; - - TimerInternals.TimerDataCoderV2 timerCoder = - TimerInternals.TimerDataCoderV2.of(windowingStrategy.getWindowFn().windowCoder()); - this.currentKeyTimerInternals = new ApexTimerInternals<>(timerCoder); - this.doFnSchemaInformation = doFnSchemaInformation; - this.sideInputMapping = sideInputMapping; - - if (doFn instanceof ProcessFn) { - // we know that it is keyed on byte[] - Coder keyCoder = ByteArrayCoder.of(); - this.currentKeyStateInternals = - new StateInternalsProxy<>(stateBackend.newStateInternalsFactory(keyCoder)); - } else { - DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass()); - if (signature.usesState()) { - checkArgument(inputCoder instanceof KvCoder, "keyed input required for stateful DoFn"); - @SuppressWarnings("rawtypes") - Coder keyCoder = ((KvCoder) inputCoder).getKeyCoder(); - this.currentKeyStateInternals = - new StateInternalsProxy<>(stateBackend.newStateInternalsFactory(keyCoder)); - } - } - } - - @SuppressWarnings("unused") // for Kryo - private ApexParDoOperator() { - this.pipelineOptions = null; - this.doFn = null; - this.mainOutputTag = null; - this.additionalOutputTags = null; - this.windowingStrategy = null; - this.sideInputs = null; - this.pushedBack = null; - this.sideInputStateInternals = null; - this.windowedInputCoder = null; - this.inputCoder = null; - this.outputCoders = Collections.emptyMap(); - this.currentKeyTimerInternals = null; - this.doFnSchemaInformation = null; - this.sideInputMapping = null; - } - - public final transient DefaultInputPort>> input = - new DefaultInputPort>>() { - @Override - public void process(ApexStreamTuple> t) { - if (t instanceof ApexStreamTuple.WatermarkTuple) { - processWatermark((ApexStreamTuple.WatermarkTuple) t); - } else { - if (traceTuples) { - LOG.debug("\ninput {}\n", t.getValue()); - } - Iterable> justPushedBack = - processElementInReadyWindows(t.getValue()); - for (WindowedValue pushedBackValue : justPushedBack) { - pushedBackWatermark.add(pushedBackValue.getTimestamp().getMillis()); - pushedBack.get().add(pushedBackValue); - } - } - } - }; - - @InputPortFieldAnnotation(optional = true) - public final transient DefaultInputPort>>> sideInput1 = - new DefaultInputPort>>>() { - @Override - public void process(ApexStreamTuple>> t) { - if (t instanceof ApexStreamTuple.WatermarkTuple) { - // ignore side input watermarks - return; - } - - int sideInputIndex = 0; - if (t instanceof ApexStreamTuple.DataTuple) { - sideInputIndex = ((ApexStreamTuple.DataTuple) t).getUnionTag(); - } - - if (traceTuples) { - LOG.debug("\nsideInput {} {}\n", sideInputIndex, t.getValue()); - } - - PCollectionView sideInput = Iterables.get(sideInputs, sideInputIndex); - sideInputHandler.addSideInputValue(sideInput, t.getValue()); - - List> newPushedBack = new ArrayList<>(); - for (WindowedValue elem : pushedBack.get()) { - Iterable> justPushedBack = processElementInReadyWindows(elem); - Iterables.addAll(newPushedBack, justPushedBack); - } - - pushedBack.get().clear(); - pushedBackWatermark.clear(); - for (WindowedValue pushedBackValue : newPushedBack) { - pushedBackWatermark.add(pushedBackValue.getTimestamp().getMillis()); - pushedBack.get().add(pushedBackValue); - } - - // potentially emit watermark - processWatermark(ApexStreamTuple.WatermarkTuple.of(currentInputWatermark)); - } - }; - - @OutputPortFieldAnnotation(optional = true) - public final transient DefaultOutputPort> output = new DefaultOutputPort<>(); - - @OutputPortFieldAnnotation(optional = true) - public final transient DefaultOutputPort> additionalOutput1 = - new DefaultOutputPort<>(); - - @OutputPortFieldAnnotation(optional = true) - public final transient DefaultOutputPort> additionalOutput2 = - new DefaultOutputPort<>(); - - @OutputPortFieldAnnotation(optional = true) - public final transient DefaultOutputPort> additionalOutput3 = - new DefaultOutputPort<>(); - - @OutputPortFieldAnnotation(optional = true) - public final transient DefaultOutputPort> additionalOutput4 = - new DefaultOutputPort<>(); - - @OutputPortFieldAnnotation(optional = true) - public final transient DefaultOutputPort> additionalOutput5 = - new DefaultOutputPort<>(); - - public final transient DefaultOutputPort[] additionalOutputPorts = { - additionalOutput1, additionalOutput2, additionalOutput3, additionalOutput4, additionalOutput5 - }; - - @Override - public void output(TupleTag tag, WindowedValue tuple) { - DefaultOutputPort> additionalOutputPort = - additionalOutputPortMapping.get(tag); - if (additionalOutputPort != null) { - additionalOutputPort.emit(ApexStreamTuple.DataTuple.of(tuple)); - } else { - output.emit(ApexStreamTuple.DataTuple.of(tuple)); - } - if (traceTuples) { - LOG.debug("\nemitting {}\n", tuple); - } - } - - private Iterable> processElementInReadyWindows(WindowedValue elem) { - try { - pushbackDoFnRunner.startBundle(); - if (currentKeyStateInternals != null) { - InputT value = elem.getValue(); - final Object key; - final Coder keyCoder; - @SuppressWarnings({"rawtypes", "unchecked"}) - WindowedValueCoder wvCoder = (WindowedValueCoder) windowedInputCoder; - if (value instanceof KeyedWorkItem) { - key = ((KeyedWorkItem) value).key(); - @SuppressWarnings({"rawtypes", "unchecked"}) - KeyedWorkItemCoder kwiCoder = (KeyedWorkItemCoder) wvCoder.getValueCoder(); - keyCoder = kwiCoder.getKeyCoder(); - } else { - key = ((KV) value).getKey(); - @SuppressWarnings({"rawtypes", "unchecked"}) - KvCoder kwiCoder = (KvCoder) wvCoder.getValueCoder(); - keyCoder = kwiCoder.getKeyCoder(); - } - ((StateInternalsProxy) currentKeyStateInternals).setKey(key); - currentKeyTimerInternals.setContext( - key, - keyCoder, - new Instant(this.currentInputWatermark), - new Instant(this.currentOutputWatermark)); - } - Iterable> pushedBack = - pushbackDoFnRunner.processElementInReadyWindows(elem); - pushbackDoFnRunner.finishBundle(); - return pushedBack; - } catch (UserCodeException ue) { - if (ue.getCause() instanceof AssertionError) { - ApexRunner.ASSERTION_ERROR.set((AssertionError) ue.getCause()); - } - throw ue; - } - } - - @Override - public void fireTimer(Object key, Collection timerDataSet) { - pushbackDoFnRunner.startBundle(); - @SuppressWarnings("unchecked") - Coder keyCoder = (Coder) currentKeyStateInternals.getKeyCoder(); - ((StateInternalsProxy) currentKeyStateInternals).setKey(key); - currentKeyTimerInternals.setContext( - key, - keyCoder, - new Instant(this.currentInputWatermark), - new Instant(this.currentOutputWatermark)); - for (TimerData timerData : timerDataSet) { - StateNamespace namespace = timerData.getNamespace(); - checkArgument(namespace instanceof WindowNamespace); - BoundedWindow window = ((WindowNamespace) namespace).getWindow(); - pushbackDoFnRunner.onTimer( - timerData.getTimerId(), - timerData.getTimerFamilyId(), - null, - window, - timerData.getTimestamp(), - timerData.getOutputTimestamp(), - timerData.getDomain()); - } - pushbackDoFnRunner.finishBundle(); - } - - private void processWatermark(ApexStreamTuple.WatermarkTuple mark) { - this.currentInputWatermark = mark.getTimestamp(); - long minEventTimeTimer = - currentKeyTimerInternals.fireReadyTimers( - this.currentInputWatermark, this, TimeDomain.EVENT_TIME); - - checkState( - minEventTimeTimer >= currentInputWatermark, - "Event time timer processing generates new timer(s) behind watermark."); - // LOG.info("Processing time timer {} registered behind watermark {}", minProcessingTimeTimer, - // currentInputWatermark); - - // TODO: is this the right way to trigger processing time timers? - // drain all timers below current watermark, including those that result from firing - long minProcessingTimeTimer = Long.MIN_VALUE; - while (minProcessingTimeTimer < currentInputWatermark) { - minProcessingTimeTimer = - currentKeyTimerInternals.fireReadyTimers( - this.currentInputWatermark, this, TimeDomain.PROCESSING_TIME); - if (minProcessingTimeTimer < currentInputWatermark) { - LOG.info( - "Processing time timer {} registered behind watermark {}", - minProcessingTimeTimer, - currentInputWatermark); - } - } - if (Iterables.isEmpty(sideInputs)) { - outputWatermark(mark); - return; - } - - long potentialOutputWatermark = Math.min(pushedBackWatermark.get(), currentInputWatermark); - if (potentialOutputWatermark > currentOutputWatermark) { - currentOutputWatermark = potentialOutputWatermark; - outputWatermark(ApexStreamTuple.WatermarkTuple.of(currentOutputWatermark)); - } - } - - private void outputWatermark(ApexStreamTuple.WatermarkTuple mark) { - if (traceTuples) { - LOG.debug("\nemitting {}\n", mark); - } - output.emit(mark); - if (!additionalOutputPortMapping.isEmpty()) { - for (DefaultOutputPort> additionalOutput : - additionalOutputPortMapping.values()) { - additionalOutput.emit(mark); - } - } - } - - @Override - public void setup(OperatorContext context) { - this.traceTuples = - ApexStreamTuple.Logging.isDebugEnabled( - pipelineOptions.get().as(ApexPipelineOptions.class), this); - SideInputReader sideInputReader = NullSideInputReader.of(sideInputs); - if (!Iterables.isEmpty(sideInputs)) { - sideInputHandler = - new SideInputHandler(Lists.newArrayList(sideInputs), sideInputStateInternals); - sideInputReader = sideInputHandler; - } - - for (int i = 0; i < additionalOutputTags.size(); i++) { - @SuppressWarnings("unchecked") - DefaultOutputPort> port = - (DefaultOutputPort>) additionalOutputPorts[i]; - additionalOutputPortMapping.put(additionalOutputTags.get(i), port); - } - - NoOpStepContext stepContext = - new NoOpStepContext() { - - @Override - public StateInternals stateInternals() { - return currentKeyStateInternals; - } - - @Override - public TimerInternals timerInternals() { - return currentKeyTimerInternals; - } - }; - DoFnRunner doFnRunner = - DoFnRunners.simpleRunner( - pipelineOptions.get(), - doFn, - sideInputReader, - this, - mainOutputTag, - additionalOutputTags, - stepContext, - inputCoder, - outputCoders, - windowingStrategy, - doFnSchemaInformation, - sideInputMapping); - - doFnInvoker = DoFnInvokers.invokerFor(doFn); - doFnInvoker.invokeSetup(); - - if (this.currentKeyStateInternals != null) { - - StatefulDoFnRunner.CleanupTimer cleanupTimer = - new StatefulDoFnRunner.TimeInternalsCleanupTimer( - stepContext.timerInternals(), windowingStrategy); - - @SuppressWarnings({"rawtypes"}) - Coder windowCoder = windowingStrategy.getWindowFn().windowCoder(); - - @SuppressWarnings({"unchecked"}) - StatefulDoFnRunner.StateCleaner stateCleaner = - new StatefulDoFnRunner.StateInternalsStateCleaner<>( - doFn, stepContext.stateInternals(), windowCoder); - - doFnRunner = - DoFnRunners.defaultStatefulDoFnRunner( - doFn, - inputCoder, - doFnRunner, - stepContext, - windowingStrategy, - cleanupTimer, - stateCleaner); - } - - pushbackDoFnRunner = - SimplePushbackSideInputDoFnRunner.create( - doFnRunner, Lists.newArrayList(sideInputs), sideInputHandler); - - if (doFn instanceof ProcessFn) { - - @SuppressWarnings("unchecked") - StateInternalsFactory stateInternalsFactory = - (StateInternalsFactory) this.currentKeyStateInternals.getFactory(); - - @SuppressWarnings({"rawtypes", "unchecked"}) - ProcessFn splittableDoFn = (ProcessFn) doFn; - splittableDoFn.setStateInternalsFactory(stateInternalsFactory); - TimerInternalsFactory timerInternalsFactory = key -> currentKeyTimerInternals; - splittableDoFn.setTimerInternalsFactory(timerInternalsFactory); - splittableDoFn.setProcessElementInvoker( - new OutputAndTimeBoundedSplittableProcessElementInvoker<>( - doFn, - pipelineOptions.get(), - new OutputWindowedValue() { - @Override - public void outputWindowedValue( - OutputT output, - Instant timestamp, - Collection windows, - PaneInfo pane) { - output(mainOutputTag, WindowedValue.of(output, timestamp, windows, pane)); - } - - @Override - public void outputWindowedValue( - TupleTag tag, - AdditionalOutputT output, - Instant timestamp, - Collection windows, - PaneInfo pane) { - output(tag, WindowedValue.of(output, timestamp, windows, pane)); - } - }, - sideInputReader, - Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory()), - 10000, - Duration.standardSeconds(10))); - } - } - - @Override - public void teardown() { - doFnInvoker.invokeTeardown(); - super.teardown(); - } - - @Override - public void beginWindow(long windowId) {} - - @Override - public void endWindow() { - currentKeyTimerInternals.fireReadyTimers( - currentKeyTimerInternals.currentProcessingTime().getMillis(), - this, - TimeDomain.PROCESSING_TIME); - } - - private static class LongMin { - long state = Long.MAX_VALUE; - - public void add(long l) { - state = Math.min(state, l); - } - - public long get() { - return state; - } - - public void clear() { - state = Long.MAX_VALUE; - } - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexProcessFnOperator.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexProcessFnOperator.java deleted file mode 100644 index f3c407871dae..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexProcessFnOperator.java +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation.operators; - -import com.datatorrent.api.DefaultInputPort; -import com.datatorrent.api.DefaultOutputPort; -import com.datatorrent.api.annotation.OutputPortFieldAnnotation; -import com.datatorrent.common.util.BaseOperator; -import com.esotericsoftware.kryo.serializers.FieldSerializer.Bind; -import com.esotericsoftware.kryo.serializers.JavaSerializer; -import java.io.Serializable; -import java.util.Collection; -import java.util.Collections; -import org.apache.beam.runners.apex.ApexPipelineOptions; -import org.apache.beam.runners.apex.translation.utils.ApexStreamTuple; -import org.apache.beam.runners.core.KeyedWorkItem; -import org.apache.beam.runners.core.KeyedWorkItems; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.WindowFn; -import org.apache.beam.sdk.util.WindowedValue; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Throwables; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Apex operator for simple native map operations. */ -public class ApexProcessFnOperator extends BaseOperator { - - private static final Logger LOG = LoggerFactory.getLogger(ApexProcessFnOperator.class); - private boolean traceTuples = false; - - @Bind(JavaSerializer.class) - private final ApexOperatorFn fn; - - public ApexProcessFnOperator(ApexOperatorFn fn, boolean traceTuples) { - super(); - this.traceTuples = traceTuples; - this.fn = fn; - } - - @SuppressWarnings("unused") - private ApexProcessFnOperator() { - // for Kryo - fn = null; - } - - private final transient OutputEmitter>> outputEmitter = - new OutputEmitter>>() { - @Override - public void emit(ApexStreamTuple> tuple) { - if (traceTuples) { - LOG.debug("\nemitting {}\n", tuple); - } - outputPort.emit(tuple); - } - }; - - /** Something that emits results. */ - public interface OutputEmitter { - void emit(T tuple); - } - - /** The processing logic for this operator. */ - public interface ApexOperatorFn extends Serializable { - void process( - ApexStreamTuple> input, - OutputEmitter>> outputEmitter) - throws Exception; - } - - /** Convert {@link KV} into {@link KeyedWorkItem}s. */ - public static ApexProcessFnOperator> toKeyedWorkItems( - ApexPipelineOptions options) { - ApexOperatorFn> fn = new ToKeyedWorkItems<>(); - return new ApexProcessFnOperator<>(fn, options.isTupleTracingEnabled()); - } - - private static class ToKeyedWorkItems implements ApexOperatorFn> { - @Override - public final void process( - ApexStreamTuple>> tuple, - OutputEmitter>> outputEmitter) { - - if (tuple instanceof ApexStreamTuple.WatermarkTuple) { - outputEmitter.emit(tuple); - } else { - for (WindowedValue> in : tuple.getValue().explodeWindows()) { - KeyedWorkItem kwi = - KeyedWorkItems.elementsWorkItem( - in.getValue().getKey(), - Collections.singletonList(in.withValue(in.getValue().getValue()))); - outputEmitter.emit(ApexStreamTuple.DataTuple.of(in.withValue(kwi))); - } - } - } - } - - public static ApexProcessFnOperator assignWindows( - WindowFn windowFn, ApexPipelineOptions options) { - ApexOperatorFn fn = new AssignWindows<>(windowFn); - return new ApexProcessFnOperator<>(fn, options.isTupleTracingEnabled()); - } - - /** Function for implementing {@link org.apache.beam.sdk.transforms.windowing.Window.Assign}. */ - private static class AssignWindows implements ApexOperatorFn { - private final WindowFn windowFn; - - private AssignWindows(WindowFn windowFn) { - this.windowFn = windowFn; - } - - @Override - public final void process( - ApexStreamTuple> tuple, - OutputEmitter>> outputEmitter) - throws Exception { - if (tuple instanceof ApexStreamTuple.WatermarkTuple) { - outputEmitter.emit(tuple); - } else { - final WindowedValue input = tuple.getValue(); - Collection windows = - windowFn.assignWindows( - windowFn.new AssignContext() { - @Override - public T element() { - return input.getValue(); - } - - @Override - public Instant timestamp() { - return input.getTimestamp(); - } - - @Override - public BoundedWindow window() { - return Iterables.getOnlyElement(input.getWindows()); - } - }); - for (W w : windows) { - WindowedValue wv = - WindowedValue.of(input.getValue(), input.getTimestamp(), w, input.getPane()); - outputEmitter.emit(ApexStreamTuple.DataTuple.of(wv)); - } - } - } - } - - /** Input port. */ - public final transient DefaultInputPort>> inputPort = - new DefaultInputPort>>() { - @Override - public void process(ApexStreamTuple> tuple) { - try { - fn.process(tuple, outputEmitter); - } catch (Exception e) { - Throwables.throwIfUnchecked(e); - throw new RuntimeException(e); - } - } - }; - - /** Output port. */ - @OutputPortFieldAnnotation(optional = true) - public final transient DefaultOutputPort>> outputPort = - new DefaultOutputPort<>(); -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexReadUnboundedInputOperator.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexReadUnboundedInputOperator.java deleted file mode 100644 index a9646b525419..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexReadUnboundedInputOperator.java +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation.operators; - -import com.datatorrent.api.Context.OperatorContext; -import com.datatorrent.api.DefaultOutputPort; -import com.datatorrent.api.InputOperator; -import com.datatorrent.api.annotation.OutputPortFieldAnnotation; -import com.datatorrent.common.util.BaseOperator; -import com.esotericsoftware.kryo.serializers.FieldSerializer.Bind; -import com.esotericsoftware.kryo.serializers.JavaSerializer; -import java.io.IOException; -import org.apache.beam.runners.apex.ApexPipelineOptions; -import org.apache.beam.runners.apex.translation.utils.ApexStreamTuple; -import org.apache.beam.runners.apex.translation.utils.ApexStreamTuple.DataTuple; -import org.apache.beam.runners.apex.translation.utils.ValuesSource; -import org.apache.beam.runners.core.construction.SerializablePipelineOptions; -import org.apache.beam.sdk.io.UnboundedSource; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.GlobalWindow; -import org.apache.beam.sdk.transforms.windowing.PaneInfo; -import org.apache.beam.sdk.util.WindowedValue; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Throwables; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Apex input operator that wraps Beam {@link UnboundedSource}. */ -public class ApexReadUnboundedInputOperator< - OutputT, CheckpointMarkT extends UnboundedSource.CheckpointMark> - implements InputOperator { - private static final Logger LOG = LoggerFactory.getLogger(ApexReadUnboundedInputOperator.class); - private boolean traceTuples = false; - private long outputWatermark = 0; - - @Bind(JavaSerializer.class) - private final SerializablePipelineOptions pipelineOptions; - - @Bind(JavaSerializer.class) - private final UnboundedSource source; - - private final boolean isBoundedSource; - private transient UnboundedSource.UnboundedReader reader; - private transient boolean available = false; - - @OutputPortFieldAnnotation(optional = true) - public final transient DefaultOutputPort>> output = - new DefaultOutputPort<>(); - - public ApexReadUnboundedInputOperator( - UnboundedSource source, ApexPipelineOptions options) { - this.pipelineOptions = new SerializablePipelineOptions(options); - this.source = source; - this.isBoundedSource = false; - } - - public ApexReadUnboundedInputOperator( - UnboundedSource source, - boolean isBoundedSource, - ApexPipelineOptions options) { - this.pipelineOptions = new SerializablePipelineOptions(options); - this.source = source; - this.isBoundedSource = isBoundedSource; - } - - @SuppressWarnings("unused") // for Kryo - private ApexReadUnboundedInputOperator() { - this.pipelineOptions = null; - this.source = null; - this.isBoundedSource = false; - } - - @Override - public void beginWindow(long windowId) { - if (!available && (isBoundedSource || source instanceof ValuesSource)) { - // if it's a Create and the input was consumed, emit final watermark - emitWatermarkIfNecessary(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()); - } else { - emitWatermarkIfNecessary(reader.getWatermark().getMillis()); - } - } - - private void emitWatermarkIfNecessary(long mark) { - if (mark > outputWatermark) { - outputWatermark = mark; - if (traceTuples) { - LOG.debug("\nemitting watermark {}\n", mark); - } - output.emit(ApexStreamTuple.WatermarkTuple.of(mark)); - } - } - - @Override - public void endWindow() { - if (outputWatermark >= BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()) { - // terminate the stream - if (traceTuples) { - LOG.debug("terminating input after final watermark"); - } - try { - // see BEAM-1140 for why the delay after mark was emitted - Thread.sleep(100); - } catch (InterruptedException e) { - } - BaseOperator.shutdown(); - } - } - - @Override - public void setup(OperatorContext context) { - this.traceTuples = - ApexStreamTuple.Logging.isDebugEnabled( - pipelineOptions.get().as(ApexPipelineOptions.class), this); - try { - reader = source.createReader(this.pipelineOptions.get(), null); - available = reader.start(); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - @Override - public void teardown() { - try { - if (reader != null) { - reader.close(); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - @Override - public void emitTuples() { - try { - if (!available) { - available = reader.advance(); - } - if (available) { - OutputT data = reader.getCurrent(); - Instant timestamp = reader.getCurrentTimestamp(); - available = reader.advance(); - if (traceTuples) { - LOG.debug("\nemitting '{}' timestamp {}\n", data, timestamp); - } - output.emit( - DataTuple.of( - WindowedValue.of(data, timestamp, GlobalWindow.INSTANCE, PaneInfo.NO_FIRING))); - } - } catch (Exception e) { - Throwables.propagateIfPossible(e); - throw new RuntimeException(e); - } - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexTimerInternals.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexTimerInternals.java deleted file mode 100644 index 886f15351db9..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexTimerInternals.java +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation.operators; - -import com.datatorrent.netlet.util.Slice; -import com.esotericsoftware.kryo.DefaultSerializer; -import com.esotericsoftware.kryo.serializers.JavaSerializer; -import java.io.Serializable; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; -import org.apache.beam.runners.core.StateNamespace; -import org.apache.beam.runners.core.TimerInternals; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.CoderException; -import org.apache.beam.sdk.state.TimeDomain; -import org.apache.beam.sdk.util.CoderUtils; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ComparisonChain; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.HashMultimap; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Multimap; -import org.joda.time.Instant; - -/** - * An implementation of Beam's {@link TimerInternals}. - * - *

Assumes that the current key is set prior to accessing the state.
- * This implementation stores timer data in heap memory and is serialized during checkpointing, it - * will only work with a small number of timers. - */ -@DefaultSerializer(JavaSerializer.class) -class ApexTimerInternals implements TimerInternals, Serializable { - - private final TimerSet eventTimeTimeTimers; - private final TimerSet processingTimeTimers; - - private transient K currentKey; - private transient Instant currentInputWatermark; - private transient Instant currentOutputWatermark; - private transient Coder keyCoder; - - public ApexTimerInternals(TimerDataCoderV2 timerDataCoder) { - this.eventTimeTimeTimers = new TimerSet(timerDataCoder); - this.processingTimeTimers = new TimerSet(timerDataCoder); - } - - public void setContext( - K key, Coder keyCoder, Instant inputWatermark, Instant outputWatermark) { - this.currentKey = key; - this.keyCoder = keyCoder; - this.currentInputWatermark = inputWatermark; - this.currentOutputWatermark = outputWatermark; - } - - @VisibleForTesting - protected TimerSet getTimerSet(TimeDomain domain) { - return (domain == TimeDomain.EVENT_TIME) ? eventTimeTimeTimers : processingTimeTimers; - } - - @Override - public void setTimer( - StateNamespace namespace, - String timerId, - String timerFamilyId, - Instant target, - Instant outputTimestamp, - TimeDomain timeDomain) { - TimerData timerData = - TimerData.of(timerId, timerFamilyId, namespace, target, outputTimestamp, timeDomain); - setTimer(timerData); - } - - @Override - public void setTimer(TimerData timerData) { - getTimerSet(timerData.getDomain()).addTimer(getKeyBytes(this.currentKey), timerData); - } - - @Override - public void deleteTimer(StateNamespace namespace, String timerId, TimeDomain timeDomain) { - getTimerSet(timeDomain).deleteTimer(getKeyBytes(this.currentKey), namespace, timerId); - } - - @Override - public void deleteTimer(StateNamespace namespace, String timerId, String timerFamilyId) { - this.eventTimeTimeTimers.deleteTimer(getKeyBytes(this.currentKey), namespace, timerId); - this.processingTimeTimers.deleteTimer(getKeyBytes(this.currentKey), namespace, timerId); - } - - @Override - public void deleteTimer(TimerData timerKey) { - getTimerSet(timerKey.getDomain()).deleteTimer(getKeyBytes(this.currentKey), timerKey); - } - - @Override - public Instant currentProcessingTime() { - return Instant.now(); - } - - @Override - public Instant currentSynchronizedProcessingTime() { - throw new UnsupportedOperationException(); - } - - @Override - public Instant currentInputWatermarkTime() { - return currentInputWatermark; - } - - @Override - public Instant currentOutputWatermarkTime() { - return currentOutputWatermark; - } - - public interface TimerProcessor { - void fireTimer(K key, Collection timerData); - } - - /** - * Fire the timers that are ready. These are the timers that are registered to be triggered at a - * time before the current time. Timer processing may register new timers, which can cause the - * returned timestamp to be before the the current time. The caller may repeat the call until such - * backdated timers are cleared. - * - * @return minimum timestamp of registered timers. - */ - public long fireReadyTimers( - long currentTime, TimerProcessor timerProcessor, TimeDomain timeDomain) { - TimerSet timers = getTimerSet(timeDomain); - - // move minTimestamp first, - // timer additions that result from firing may modify it - timers.minTimestamp = currentTime; - - // we keep the timers to return in a different list and launch them later - // because we cannot prevent a trigger from registering another timer, - // which would lead to concurrent modification exception. - Multimap toFire = HashMultimap.create(); - - Iterator>> it = timers.activeTimers.entrySet().iterator(); - while (it.hasNext()) { - Map.Entry> keyWithTimers = it.next(); - - Iterator timerIt = keyWithTimers.getValue().iterator(); - while (timerIt.hasNext()) { - try { - TimerData timerData = - CoderUtils.decodeFromByteArray(timers.timerDataCoder, timerIt.next().buffer); - if (timerData.getTimestamp().isBefore(currentTime)) { - toFire.put(keyWithTimers.getKey(), timerData); - timerIt.remove(); - } - } catch (CoderException e) { - throw new RuntimeException(e); - } - } - - if (keyWithTimers.getValue().isEmpty()) { - it.remove(); - } - } - - // fire ready timers - if (!toFire.isEmpty()) { - for (Slice keyBytes : toFire.keySet()) { - try { - K key = CoderUtils.decodeFromByteArray(keyCoder, keyBytes.buffer); - timerProcessor.fireTimer(key, toFire.get(keyBytes)); - } catch (CoderException e) { - throw new RuntimeException(e); - } - } - } - - return timers.minTimestamp; - } - - private Slice getKeyBytes(K key) { - try { - return new Slice(CoderUtils.encodeToByteArray(keyCoder, key)); - } catch (CoderException e) { - throw new RuntimeException(e); - } - } - - protected static class TimerSet implements Serializable { - private final Map> activeTimers = new HashMap<>(); - private final TimerDataCoderV2 timerDataCoder; - private long minTimestamp = Long.MAX_VALUE; - - protected TimerSet(TimerDataCoderV2 timerDataCoder) { - this.timerDataCoder = timerDataCoder; - } - - public void addTimer(Slice keyBytes, TimerData timer) { - Set timersForKey = activeTimers.get(keyBytes); - if (timersForKey == null) { - timersForKey = new HashSet<>(); - } - - try { - Slice timerBytes = new Slice(CoderUtils.encodeToByteArray(timerDataCoder, timer)); - timersForKey.add(timerBytes); - } catch (CoderException e) { - throw new RuntimeException(e); - } - - activeTimers.put(keyBytes, timersForKey); - this.minTimestamp = Math.min(minTimestamp, timer.getTimestamp().getMillis()); - } - - public void deleteTimer(Slice keyBytes, StateNamespace namespace, String timerId) { - Set timersForKey = activeTimers.get(keyBytes); - if (timersForKey == null) { - return; - } - - Iterator timerIt = timersForKey.iterator(); - while (timerIt.hasNext()) { - try { - TimerData timerData = - CoderUtils.decodeFromByteArray(timerDataCoder, timerIt.next().buffer); - ComparisonChain chain = ComparisonChain.start().compare(timerData.getTimerId(), timerId); - if (chain.result() == 0 && !timerData.getNamespace().equals(namespace)) { - // Obtaining the stringKey may be expensive; only do so if required - chain = chain.compare(timerData.getNamespace().stringKey(), namespace.stringKey()); - } - if (chain.result() == 0) { - timerIt.remove(); - } - } catch (CoderException e) { - throw new RuntimeException(e); - } - } - - if (timersForKey.isEmpty()) { - activeTimers.remove(keyBytes); - } - } - - public void deleteTimer(Slice keyBytes, TimerData timerKey) { - Set timersForKey = activeTimers.get(keyBytes); - if (timersForKey != null) { - try { - Slice timerBytes = new Slice(CoderUtils.encodeToByteArray(timerDataCoder, timerKey)); - timersForKey.add(timerBytes); - timersForKey.remove(timerBytes); - } catch (CoderException e) { - throw new RuntimeException(e); - } - - if (timersForKey.isEmpty()) { - activeTimers.remove(keyBytes); - } else { - activeTimers.put(keyBytes, timersForKey); - } - } - } - - @VisibleForTesting - protected Map> getMap() { - return activeTimers; - } - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/package-info.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/package-info.java deleted file mode 100644 index 1ce24ceb9f0a..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Implementation of the Beam runner for Apache Apex. */ -package org.apache.beam.runners.apex.translation.operators; diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/package-info.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/package-info.java deleted file mode 100644 index 15918a26df69..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Implementation of the Beam runner for Apache Apex. */ -package org.apache.beam.runners.apex.translation; diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/ApexStateInternals.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/ApexStateInternals.java deleted file mode 100644 index 998007bbceea..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/ApexStateInternals.java +++ /dev/null @@ -1,437 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation.utils; - -import com.datatorrent.netlet.util.Slice; -import com.esotericsoftware.kryo.DefaultSerializer; -import com.esotericsoftware.kryo.io.Input; -import com.esotericsoftware.kryo.serializers.JavaSerializer; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.beam.runners.core.StateInternals; -import org.apache.beam.runners.core.StateInternalsFactory; -import org.apache.beam.runners.core.StateNamespace; -import org.apache.beam.runners.core.StateTag; -import org.apache.beam.runners.core.StateTag.StateBinder; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.CoderException; -import org.apache.beam.sdk.coders.InstantCoder; -import org.apache.beam.sdk.coders.ListCoder; -import org.apache.beam.sdk.state.BagState; -import org.apache.beam.sdk.state.CombiningState; -import org.apache.beam.sdk.state.MapState; -import org.apache.beam.sdk.state.ReadableState; -import org.apache.beam.sdk.state.SetState; -import org.apache.beam.sdk.state.State; -import org.apache.beam.sdk.state.StateContext; -import org.apache.beam.sdk.state.ValueState; -import org.apache.beam.sdk.state.WatermarkHoldState; -import org.apache.beam.sdk.transforms.Combine.CombineFn; -import org.apache.beam.sdk.transforms.CombineWithContext.CombineFnWithContext; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.TimestampCombiner; -import org.apache.beam.sdk.util.CoderUtils; -import org.apache.beam.sdk.util.CombineFnUtil; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.HashBasedTable; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Table; -import org.joda.time.Instant; - -/** - * Implementation of {@link StateInternals} for transient use. - * - *

For fields that need to be serialized, use {@link ApexStateInternalsFactory} or {@link - * StateInternalsProxy} - */ -public class ApexStateInternals implements StateInternals { - private final K key; - private final Table stateTable; - - protected ApexStateInternals(K key, Table stateTable) { - this.key = key; - this.stateTable = stateTable; - } - - @Override - public K getKey() { - return key; - } - - @Override - public T state( - StateNamespace namespace, StateTag address, final StateContext c) { - return address.bind(new ApexStateBinder(namespace, address, c)); - } - - /** A {@link StateBinder} that returns {@link State} wrappers for serialized state. */ - private class ApexStateBinder implements StateBinder { - private final StateNamespace namespace; - private final StateContext c; - - private ApexStateBinder(StateNamespace namespace, StateTag address, StateContext c) { - this.namespace = namespace; - this.c = c; - } - - @Override - public ValueState bindValue(StateTag> address, Coder coder) { - return new ApexValueState<>(namespace, address, coder); - } - - @Override - public BagState bindBag(final StateTag> address, Coder elemCoder) { - return new ApexBagState<>(namespace, address, elemCoder); - } - - @Override - public SetState bindSet(StateTag> address, Coder elemCoder) { - throw new UnsupportedOperationException( - String.format("%s is not supported", SetState.class.getSimpleName())); - } - - @Override - public MapState bindMap( - StateTag> spec, - Coder mapKeyCoder, - Coder mapValueCoder) { - throw new UnsupportedOperationException( - String.format("%s is not supported", MapState.class.getSimpleName())); - } - - @Override - public CombiningState bindCombiningValue( - StateTag> address, - Coder accumCoder, - final CombineFn combineFn) { - return new ApexCombiningState<>(namespace, address, accumCoder, combineFn); - } - - @Override - public WatermarkHoldState bindWatermark( - StateTag address, TimestampCombiner timestampCombiner) { - return new ApexWatermarkHoldState<>(namespace, address, timestampCombiner); - } - - @Override - public - CombiningState bindCombiningValueWithContext( - StateTag> address, - Coder accumCoder, - CombineFnWithContext combineFn) { - return bindCombiningValue(address, accumCoder, CombineFnUtil.bindContext(combineFn, c)); - } - } - - private class AbstractState { - protected final StateNamespace namespace; - protected final StateTag address; - protected final Coder coder; - - private AbstractState( - StateNamespace namespace, StateTag address, Coder coder) { - this.namespace = namespace; - this.address = address; - this.coder = coder; - } - - protected T readValue() { - T value = null; - byte[] buf = stateTable.get(namespace.stringKey(), address.getId()); - if (buf != null) { - // TODO: reuse input - Input input = new Input(buf); - try { - return coder.decode(input); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - return value; - } - - public void writeValue(T input) { - ByteArrayOutputStream output = new ByteArrayOutputStream(); - try { - coder.encode(input, output); - stateTable.put(namespace.stringKey(), address.getId(), output.toByteArray()); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public void clear() { - stateTable.remove(namespace.stringKey(), address.getId()); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - @SuppressWarnings("unchecked") - AbstractState that = (AbstractState) o; - return namespace.equals(that.namespace) && address.equals(that.address); - } - - @Override - public int hashCode() { - int result = namespace.hashCode(); - result = 31 * result + address.hashCode(); - return result; - } - } - - private class ApexValueState extends AbstractState implements ValueState { - - private ApexValueState( - StateNamespace namespace, StateTag> address, Coder coder) { - super(namespace, address, coder); - } - - @Override - public ApexValueState readLater() { - return this; - } - - @Override - public T read() { - return readValue(); - } - - @Override - public void write(T input) { - writeValue(input); - } - } - - private final class ApexWatermarkHoldState extends AbstractState - implements WatermarkHoldState { - - private final TimestampCombiner timestampCombiner; - - public ApexWatermarkHoldState( - StateNamespace namespace, - StateTag address, - TimestampCombiner timestampCombiner) { - super(namespace, address, InstantCoder.of()); - this.timestampCombiner = timestampCombiner; - } - - @Override - public ApexWatermarkHoldState readLater() { - return this; - } - - @Override - public Instant read() { - return readValue(); - } - - @Override - public void add(Instant outputTime) { - Instant combined = read(); - combined = (combined == null) ? outputTime : timestampCombiner.combine(combined, outputTime); - writeValue(combined); - } - - @Override - public ReadableState isEmpty() { - return new ReadableState() { - @Override - public ReadableState readLater() { - return this; - } - - @Override - public Boolean read() { - return stateTable.get(namespace.stringKey(), address.getId()) == null; - } - }; - } - - @Override - public TimestampCombiner getTimestampCombiner() { - return timestampCombiner; - } - } - - private final class ApexCombiningState extends AbstractState - implements CombiningState { - private final CombineFn combineFn; - - private ApexCombiningState( - StateNamespace namespace, - StateTag> address, - Coder coder, - CombineFn combineFn) { - super(namespace, address, coder); - this.combineFn = combineFn; - } - - @Override - public ApexCombiningState readLater() { - return this; - } - - @Override - public OutputT read() { - return combineFn.extractOutput(getAccum()); - } - - @Override - public void add(InputT input) { - AccumT accum = combineFn.addInput(getAccum(), input); - writeValue(accum); - } - - @Override - public AccumT getAccum() { - AccumT accum = readValue(); - if (accum == null) { - accum = combineFn.createAccumulator(); - } - return accum; - } - - @Override - public ReadableState isEmpty() { - return new ReadableState() { - @Override - public ReadableState readLater() { - return this; - } - - @Override - public Boolean read() { - return stateTable.get(namespace.stringKey(), address.getId()) == null; - } - }; - } - - @Override - public void addAccum(AccumT accum) { - accum = combineFn.mergeAccumulators(Arrays.asList(getAccum(), accum)); - writeValue(accum); - } - - @Override - public AccumT mergeAccumulators(Iterable accumulators) { - return combineFn.mergeAccumulators(accumulators); - } - } - - private final class ApexBagState extends AbstractState> implements BagState { - private ApexBagState(StateNamespace namespace, StateTag> address, Coder coder) { - super(namespace, address, ListCoder.of(coder)); - } - - @Override - public ApexBagState readLater() { - return this; - } - - @Override - public List read() { - List value = super.readValue(); - if (value == null) { - value = new ArrayList<>(); - } - return value; - } - - @Override - public void add(T input) { - List value = read(); - value.add(input); - writeValue(value); - } - - @Override - public ReadableState isEmpty() { - return new ReadableState() { - @Override - public ReadableState readLater() { - return this; - } - - @Override - public Boolean read() { - return stateTable.get(namespace.stringKey(), address.getId()) == null; - } - }; - } - } - - /** - * Implementation of {@link StateInternals} that can be serialized and checkpointed with the - * operator. Suitable for small states, in the future this should be based on the incremental - * state saving components in the Apex library. - * - * @param key type - */ - @DefaultSerializer(JavaSerializer.class) - public static class ApexStateInternalsFactory - implements StateInternalsFactory, Serializable { - private static final long serialVersionUID = 1L; - /** Serializable state for internals (namespace to state tag to coded value). */ - private Map> perKeyState = new HashMap<>(); - - private final Coder keyCoder; - - private ApexStateInternalsFactory(Coder keyCoder) { - this.keyCoder = keyCoder; - } - - public Coder getKeyCoder() { - return this.keyCoder; - } - - @Override - public ApexStateInternals stateInternalsForKey(K key) { - final Slice keyBytes; - try { - keyBytes = - (key != null) - ? new Slice(CoderUtils.encodeToByteArray(keyCoder, key)) - : new Slice(null); - } catch (CoderException e) { - throw new RuntimeException(e); - } - HashBasedTable stateTable = - perKeyState.computeIfAbsent(keyBytes, k -> HashBasedTable.create()); - return new ApexStateInternals<>(key, stateTable); - } - } - - /** Factory to create the state internals. */ - public static class ApexStateBackend implements Serializable { - private static final long serialVersionUID = 1L; - - public ApexStateInternalsFactory newStateInternalsFactory(Coder keyCoder) { - return new ApexStateInternalsFactory<>(keyCoder); - } - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/ApexStreamTuple.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/ApexStreamTuple.java deleted file mode 100644 index 3f1a9cf82860..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/ApexStreamTuple.java +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation.utils; - -import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull; - -import com.datatorrent.api.Operator; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.Arrays; -import java.util.List; -import java.util.Objects; -import org.apache.beam.runners.apex.ApexPipelineOptions; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.CoderException; -import org.apache.beam.sdk.coders.StructuredCoder; - -/** - * The common interface for all objects transmitted through streams. - * - * @param The actual payload type. - */ -public interface ApexStreamTuple { - /** - * Gets the value of the tuple. - * - * @return tuple - */ - T getValue(); - - /** - * Data tuple class. - * - * @param tuple type - */ - class DataTuple implements ApexStreamTuple { - private int unionTag; - private T value; - - public static DataTuple of(T value) { - return new DataTuple<>(value, 0); - } - - private DataTuple(T value, int unionTag) { - this.value = value; - this.unionTag = unionTag; - } - - @Override - public T getValue() { - return value; - } - - public void setValue(T value) { - this.value = value; - } - - public int getUnionTag() { - return unionTag; - } - - public void setUnionTag(int unionTag) { - this.unionTag = unionTag; - } - - @Override - public String toString() { - return value.toString(); - } - } - - /** - * Tuple that includes a timestamp. - * - * @param tuple type - */ - class TimestampedTuple extends DataTuple { - private long timestamp; - - public TimestampedTuple(long timestamp, T value) { - super(value, 0); - this.timestamp = timestamp; - } - - public long getTimestamp() { - return timestamp; - } - - public void setTimestamp(long timestamp) { - this.timestamp = timestamp; - } - - @Override - public int hashCode() { - return Objects.hash(timestamp); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof TimestampedTuple)) { - return false; - } else { - TimestampedTuple other = (TimestampedTuple) obj; - return (timestamp == other.timestamp) && Objects.equals(this.getValue(), other.getValue()); - } - } - } - - /** - * Tuple that represents a watermark. - * - * @param tuple type - */ - class WatermarkTuple extends TimestampedTuple { - public static WatermarkTuple of(long timestamp) { - return new WatermarkTuple<>(timestamp); - } - - protected WatermarkTuple(long timestamp) { - super(timestamp, null); - } - - @Override - public String toString() { - return "[Watermark " + getTimestamp() + "]"; - } - } - - /** Coder for {@link ApexStreamTuple}. */ - class ApexStreamTupleCoder extends StructuredCoder> { - private static final long serialVersionUID = 1L; - final Coder valueCoder; - - public static ApexStreamTupleCoder of(Coder valueCoder) { - return new ApexStreamTupleCoder<>(valueCoder); - } - - protected ApexStreamTupleCoder(Coder valueCoder) { - this.valueCoder = checkNotNull(valueCoder); - } - - @Override - public void encode(ApexStreamTuple value, OutputStream outStream) - throws CoderException, IOException { - encode(value, outStream, Context.NESTED); - } - - @Override - public void encode(ApexStreamTuple value, OutputStream outStream, Context context) - throws CoderException, IOException { - if (value instanceof WatermarkTuple) { - outStream.write(1); - new DataOutputStream(outStream).writeLong(((WatermarkTuple) value).getTimestamp()); - } else { - outStream.write(0); - outStream.write(((DataTuple) value).unionTag); - valueCoder.encode(value.getValue(), outStream, context); - } - } - - @Override - public ApexStreamTuple decode(InputStream inStream) throws CoderException, IOException { - return decode(inStream, Context.NESTED); - } - - @Override - public ApexStreamTuple decode(InputStream inStream, Context context) - throws CoderException, IOException { - int b = inStream.read(); - if (b == 1) { - return new WatermarkTuple<>(new DataInputStream(inStream).readLong()); - } else { - int unionTag = inStream.read(); - return new DataTuple<>(valueCoder.decode(inStream, context), unionTag); - } - } - - @Override - public List> getCoderArguments() { - return Arrays.>asList(valueCoder); - } - - @Override - public void verifyDeterministic() throws NonDeterministicException { - verifyDeterministic( - this, - this.getClass().getSimpleName() + " requires a deterministic valueCoder", - valueCoder); - } - - /** Returns the value coder. */ - public Coder getValueCoder() { - return valueCoder; - } - } - - /** - * Central if data tuples received on and emitted from ports should be logged. Should be called in - * setup and value cached in operator. - */ - final class Logging { - public static boolean isDebugEnabled(ApexPipelineOptions options, Operator operator) { - return options.isTupleTracingEnabled(); - } - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/CoderAdapterStreamCodec.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/CoderAdapterStreamCodec.java deleted file mode 100644 index 3942e185742c..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/CoderAdapterStreamCodec.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation.utils; - -import com.datatorrent.api.StreamCodec; -import com.datatorrent.netlet.util.Slice; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.Serializable; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.Coder.Context; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting; - -/** The Apex {@link StreamCodec} adapter for using Beam {@link Coder}. */ -public class CoderAdapterStreamCodec implements StreamCodec, Serializable { - private static final long serialVersionUID = 1L; - private final Coder coder; - - public CoderAdapterStreamCodec(Coder coder) { - this.coder = coder; - } - - @VisibleForTesting - public Coder getCoder() { - return this.coder; - } - - @Override - public Object fromByteArray(Slice fragment) { - ByteArrayInputStream bis = - new ByteArrayInputStream(fragment.buffer, fragment.offset, fragment.length); - try { - return coder.decode(bis, Context.OUTER); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - @Override - public Slice toByteArray(Object wv) { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - try { - coder.encode(wv, bos, Context.OUTER); - } catch (IOException e) { - throw new RuntimeException(e); - } - return new Slice(bos.toByteArray()); - } - - @Override - public int getPartition(Object o) { - return o.hashCode(); - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/NoOpStepContext.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/NoOpStepContext.java deleted file mode 100644 index 9d5e998fb1be..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/NoOpStepContext.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation.utils; - -import java.io.Serializable; -import org.apache.beam.runners.core.StateInternals; -import org.apache.beam.runners.core.StepContext; -import org.apache.beam.runners.core.TimerInternals; - -/** Serializable {@link StepContext} that does nothing. */ -public class NoOpStepContext implements StepContext, Serializable { - private static final long serialVersionUID = 1L; - - @Override - public StateInternals stateInternals() { - throw new UnsupportedOperationException("stateInternals is not supported"); - } - - @Override - public TimerInternals timerInternals() { - throw new UnsupportedOperationException("timerInternals is not supported"); - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/StateInternalsProxy.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/StateInternalsProxy.java deleted file mode 100644 index afd0b9000cc4..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/StateInternalsProxy.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation.utils; - -import com.esotericsoftware.kryo.DefaultSerializer; -import com.esotericsoftware.kryo.serializers.JavaSerializer; -import java.io.Serializable; -import org.apache.beam.runners.core.StateInternals; -import org.apache.beam.runners.core.StateInternalsFactory; -import org.apache.beam.runners.core.StateNamespace; -import org.apache.beam.runners.core.StateTag; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.state.State; -import org.apache.beam.sdk.state.StateContext; - -/** State internals for reusable processing context. */ -@DefaultSerializer(JavaSerializer.class) -public class StateInternalsProxy implements StateInternals, Serializable { - - private final ApexStateInternals.ApexStateInternalsFactory factory; - private transient K currentKey; - - public StateInternalsProxy(ApexStateInternals.ApexStateInternalsFactory factory) { - this.factory = factory; - } - - public StateInternalsFactory getFactory() { - return this.factory; - } - - public Coder getKeyCoder() { - return factory.getKeyCoder(); - } - - public void setKey(K key) { - currentKey = key; - } - - @Override - public K getKey() { - return currentKey; - } - - @Override - public T state(StateNamespace namespace, StateTag address) { - return factory.stateInternalsForKey(currentKey).state(namespace, address); - } - - @Override - public T state( - StateNamespace namespace, StateTag address, StateContext c) { - return factory.stateInternalsForKey(currentKey).state(namespace, address, c); - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/ValueAndCoderKryoSerializable.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/ValueAndCoderKryoSerializable.java deleted file mode 100644 index 641fd1d892c6..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/ValueAndCoderKryoSerializable.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation.utils; - -import com.esotericsoftware.kryo.Kryo; -import com.esotericsoftware.kryo.KryoSerializable; -import com.esotericsoftware.kryo.io.Input; -import com.esotericsoftware.kryo.io.Output; -import com.esotericsoftware.kryo.serializers.JavaSerializer; -import java.io.IOException; -import org.apache.beam.sdk.coders.Coder; - -/** - * A {@link KryoSerializable} holder that uses the specified {@link Coder}. - * - * @param element type - */ -public class ValueAndCoderKryoSerializable implements KryoSerializable { - private static final JavaSerializer JAVA_SERIALIZER = new JavaSerializer(); - private T value; - private Coder coder; - - public ValueAndCoderKryoSerializable(T value, Coder coder) { - this.value = value; - this.coder = coder; - } - - @SuppressWarnings("unused") // for Kryo - private ValueAndCoderKryoSerializable() {} - - public T get() { - return value; - } - - @Override - public void write(Kryo kryo, Output output) { - try { - kryo.writeClass(output, coder.getClass()); - kryo.writeObject(output, coder, JAVA_SERIALIZER); - coder.encode(value, output); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - @Override - public void read(Kryo kryo, Input input) { - try { - @SuppressWarnings("unchecked") - Class> type = kryo.readClass(input).getType(); - coder = kryo.readObject(input, type, JAVA_SERIALIZER); - value = coder.decode(input); - } catch (IOException e) { - throw new RuntimeException(e); - } - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/ValuesSource.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/ValuesSource.java deleted file mode 100644 index fc9284c05570..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/ValuesSource.java +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation.utils; - -import static org.apache.beam.sdk.io.UnboundedSource.CheckpointMark.NOOP_CHECKPOINT_MARK; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.Collections; -import java.util.Iterator; -import java.util.NoSuchElementException; -import javax.annotation.Nullable; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.Coder.Context; -import org.apache.beam.sdk.coders.IterableCoder; -import org.apache.beam.sdk.io.UnboundedSource; -import org.apache.beam.sdk.options.PipelineOptions; -import org.joda.time.Instant; - -/** Unbounded source that reads from a Java {@link Iterable}. */ -public class ValuesSource extends UnboundedSource { - private static final long serialVersionUID = 1L; - - private final byte[] codedValues; - private final IterableCoder iterableCoder; - - public ValuesSource(Iterable values, Coder coder) { - this.iterableCoder = IterableCoder.of(coder); - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - try { - iterableCoder.encode(values, bos, Context.OUTER); - } catch (IOException ex) { - throw new RuntimeException(ex); - } - this.codedValues = bos.toByteArray(); - } - - @Override - public java.util.List> split( - int desiredNumSplits, PipelineOptions options) throws Exception { - return Collections.singletonList(this); - } - - @Override - public UnboundedReader createReader( - PipelineOptions options, @Nullable CheckpointMark checkpointMark) { - ByteArrayInputStream bis = new ByteArrayInputStream(codedValues); - try { - Iterable values = this.iterableCoder.decode(bis, Context.OUTER); - return new ValuesReader<>(values, this); - } catch (IOException ex) { - throw new RuntimeException(ex); - } - } - - @Nullable - @Override - public Coder getCheckpointMarkCoder() { - return null; - } - - @Override - public Coder getOutputCoder() { - return iterableCoder.getElemCoder(); - } - - private static class ValuesReader extends UnboundedReader { - - private final Iterable values; - private final UnboundedSource source; - private transient Iterator iterator; - private T current; - - public ValuesReader(Iterable values, UnboundedSource source) { - this.values = values; - this.source = source; - } - - @Override - public boolean start() throws IOException { - if (null == iterator) { - iterator = values.iterator(); - } - return advance(); - } - - @Override - public boolean advance() throws IOException { - if (iterator.hasNext()) { - current = iterator.next(); - return true; - } else { - return false; - } - } - - @Override - public T getCurrent() throws NoSuchElementException { - return current; - } - - @Override - public Instant getCurrentTimestamp() throws NoSuchElementException { - return Instant.now(); - } - - @Override - public void close() throws IOException {} - - @Override - public Instant getWatermark() { - return Instant.now(); - } - - @Override - public CheckpointMark getCheckpointMark() { - return NOOP_CHECKPOINT_MARK; - } - - @Override - public UnboundedSource getCurrentSource() { - return source; - } - } -} diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/package-info.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/package-info.java deleted file mode 100644 index 387c2cccc9ce..000000000000 --- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/utils/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Implementation of the Beam runner for Apache Apex. */ -package org.apache.beam.runners.apex.translation.utils; diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexRunnerRegistrarTest.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexRunnerRegistrarTest.java deleted file mode 100644 index ceab279130a3..000000000000 --- a/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexRunnerRegistrarTest.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex; - -import static org.junit.Assert.assertEquals; - -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.junit.Test; - -/** Tests the proper registration of the Apex runner. */ -public class ApexRunnerRegistrarTest { - - @Test - public void testFullName() { - String[] args = new String[] {String.format("--runner=%s", ApexRunner.class.getName())}; - PipelineOptions opts = PipelineOptionsFactory.fromArgs(args).create(); - assertEquals(opts.getRunner(), ApexRunner.class); - } - - @Test - public void testClassName() { - String[] args = new String[] {String.format("--runner=%s", ApexRunner.class.getSimpleName())}; - PipelineOptions opts = PipelineOptionsFactory.fromArgs(args).create(); - assertEquals(opts.getRunner(), ApexRunner.class); - } -} diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexRunnerTest.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexRunnerTest.java deleted file mode 100644 index 5d5e3a37c264..000000000000 --- a/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexRunnerTest.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex; - -import com.datatorrent.api.DAG; -import com.datatorrent.api.DAG.Locality; -import com.datatorrent.api.DAG.OperatorMeta; -import com.datatorrent.stram.engine.OperatorContext; -import java.io.File; -import java.io.FileOutputStream; -import java.util.Properties; -import java.util.Set; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.VoidCoder; -import org.apache.beam.sdk.io.GenerateSequence; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets; -import org.hamcrest.Matchers; -import org.junit.Assert; -import org.junit.Test; - -/** Tests for the Apex runner. */ -public class ApexRunnerTest { - - @Test - public void testConfigProperties() throws Exception { - String operName = "testProperties"; - ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class); - - // default configuration from class path - Pipeline p = Pipeline.create(); - Create.Values empty = Create.empty(VoidCoder.of()); - p.apply(operName, empty); - - DAG dag = TestApexRunner.translate(p, options); - OperatorMeta t1Meta = dag.getOperatorMeta(operName); - Assert.assertNotNull(t1Meta); - Assert.assertEquals(Integer.valueOf(32), t1Meta.getValue(OperatorContext.MEMORY_MB)); - - File tmp = File.createTempFile("beam-runners-apex-", ".properties"); - tmp.deleteOnExit(); - Properties props = new Properties(); - props.setProperty("apex.operator." + operName + ".attr.MEMORY_MB", "64"); - try (FileOutputStream fos = new FileOutputStream(tmp)) { - props.store(fos, ""); - } - options.setConfigFile(tmp.getAbsolutePath()); - dag = TestApexRunner.translate(p, options); - tmp.delete(); - - t1Meta = dag.getOperatorMeta(operName); - Assert.assertNotNull(t1Meta); - Assert.assertEquals(Integer.valueOf(64), t1Meta.getValue(OperatorContext.MEMORY_MB)); - } - - @Test - public void testParDoChaining() throws Exception { - Pipeline p = Pipeline.create(); - long numElements = 1000; - PCollection input = p.apply(GenerateSequence.from(0).to(numElements)); - PAssert.thatSingleton(input.apply("Count", Count.globally())).isEqualTo(numElements); - - ApexPipelineOptions options = PipelineOptionsFactory.as(ApexPipelineOptions.class); - DAG dag = TestApexRunner.translate(p, options); - - String[] expectedThreadLocal = {"/GroupGlobally/RewindowActuals/Window.Assign"}; - Set actualThreadLocal = Sets.newHashSet(); - for (DAG.StreamMeta sm : dag.getAllStreamsMeta()) { - DAG.OutputPortMeta opm = sm.getSource(); - if (sm.getLocality() == Locality.THREAD_LOCAL) { - String name = opm.getOperatorMeta().getName(); - String prefix = "PAssert$"; - if (name.startsWith(prefix)) { - // remove indeterministic prefix - name = name.substring(prefix.length() + 1); - } - actualThreadLocal.add(name); - } - } - Assert.assertThat(actualThreadLocal, Matchers.hasItems(expectedThreadLocal)); - } -} diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexYarnLauncherTest.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexYarnLauncherTest.java deleted file mode 100644 index 7df1e2bcf483..000000000000 --- a/runners/apex/src/test/java/org/apache/beam/runners/apex/ApexYarnLauncherTest.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex; - -import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.not; -import static org.junit.Assert.assertThat; - -import com.datatorrent.api.Attribute; -import com.datatorrent.api.Attribute.AttributeMap; -import com.datatorrent.api.Context.DAGContext; -import com.datatorrent.api.DAG; -import com.datatorrent.api.StreamingApplication; -import java.io.File; -import java.net.URI; -import java.nio.charset.StandardCharsets; -import java.nio.file.FileSystem; -import java.nio.file.FileSystems; -import java.nio.file.Files; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.jar.JarFile; -import org.apache.apex.api.EmbeddedAppLauncher; -import org.apache.apex.api.Launcher; -import org.apache.apex.api.Launcher.AppHandle; -import org.apache.apex.api.Launcher.LaunchMode; -import org.apache.hadoop.conf.Configuration; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -/** Test for dependency resolution for pipeline execution on YARN. */ -public class ApexYarnLauncherTest { - @Rule public TemporaryFolder tmpFolder = new TemporaryFolder(); - - @Test - public void testGetYarnDeployDependencies() throws Exception { - List deps = ApexYarnLauncher.getYarnDeployDependencies(); - String depsToString = deps.toString(); - // the beam dependencies are not present as jar when running within the Maven build reactor - // assertThat(depsToString, containsString("beam-runners-core-")); - // assertThat(depsToString, containsString("beam-runners-apex-")); - assertThat(depsToString, containsString("apex-common-")); - assertThat(depsToString, not(containsString("hadoop-"))); - assertThat(depsToString, not(containsString("zookeeper-"))); - } - - @Test - public void testProxyLauncher() throws Exception { - // use the embedded launcher to build the DAG only - EmbeddedAppLauncher embeddedLauncher = Launcher.getLauncher(LaunchMode.EMBEDDED); - - StreamingApplication app = - (dag, conf) -> dag.setAttribute(DAGContext.APPLICATION_NAME, "DummyApp"); - - Configuration conf = new Configuration(false); - DAG dag = embeddedLauncher.prepareDAG(app, conf); - Attribute.AttributeMap launchAttributes = new Attribute.AttributeMap.DefaultAttributeMap(); - Properties configProperties = new Properties(); - ApexYarnLauncher launcher = new ApexYarnLauncher(); - launcher.launchApp(new MockApexYarnLauncherParams(dag, launchAttributes, configProperties)); - } - - private static class MockApexYarnLauncherParams extends ApexYarnLauncher.LaunchParams { - private static final long serialVersionUID = 1L; - - public MockApexYarnLauncherParams( - DAG dag, AttributeMap launchAttributes, Properties properties) { - super(dag, launchAttributes, properties); - } - - @Override - protected Launcher getApexLauncher() { - return new Launcher() { - @Override - public AppHandle launchApp( - StreamingApplication application, - Configuration configuration, - AttributeMap launchParameters) - throws Launcher.LauncherException { - EmbeddedAppLauncher embeddedLauncher = Launcher.getLauncher(LaunchMode.EMBEDDED); - DAG dag = embeddedLauncher.getDAG(); - application.populateDAG(dag, new Configuration(false)); - String appName = dag.getValue(DAGContext.APPLICATION_NAME); - Assert.assertEquals("DummyApp", appName); - return new AppHandle() { - @Override - public boolean isFinished() { - return true; - } - - @Override - public void shutdown(Launcher.ShutdownMode arg0) {} - }; - } - }; - } - } - - @Test - public void testCreateJar() throws Exception { - File baseDir = tmpFolder.newFolder("target", "testCreateJar"); - File srcDir = tmpFolder.newFolder("target", "testCreateJar", "src"); - String file1 = "file1"; - Files.write(new File(srcDir, file1).toPath(), "file1".getBytes(StandardCharsets.UTF_8)); - - File jarFile = new File(baseDir, "test.jar"); - ApexYarnLauncher.createJar(srcDir, jarFile); - Assert.assertTrue("exists: " + jarFile, jarFile.exists()); - URI uri = URI.create("jar:" + jarFile.toURI()); - final Map env = Collections.singletonMap("create", "true"); - try (final FileSystem zipfs = FileSystems.newFileSystem(uri, env)) { - Assert.assertTrue("manifest", Files.isRegularFile(zipfs.getPath(JarFile.MANIFEST_NAME))); - Assert.assertTrue("file1", Files.isRegularFile(zipfs.getPath(file1))); - } - } -} diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/examples/UnboundedTextSource.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/examples/UnboundedTextSource.java deleted file mode 100644 index dae9a3016778..000000000000 --- a/runners/apex/src/test/java/org/apache/beam/runners/apex/examples/UnboundedTextSource.java +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.examples; - -import java.io.IOException; -import java.io.Serializable; -import java.util.Collections; -import java.util.List; -import java.util.NoSuchElementException; -import javax.annotation.Nullable; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.io.UnboundedSource; -import org.apache.beam.sdk.options.PipelineOptions; -import org.joda.time.Instant; - -/** unbounded source that reads from text. */ -public class UnboundedTextSource extends UnboundedSource { - private static final long serialVersionUID = 1L; - - @Override - public List> split( - int desiredNumSplits, PipelineOptions options) throws Exception { - return Collections.>singletonList(this); - } - - @Override - public UnboundedReader createReader( - PipelineOptions options, @Nullable CheckpointMark checkpointMark) { - return new UnboundedTextReader(this); - } - - @Nullable - @Override - public Coder getCheckpointMarkCoder() { - return null; - } - - @Override - public Coder getOutputCoder() { - return StringUtf8Coder.of(); - } - - /** reads from text. */ - public static class UnboundedTextReader extends UnboundedReader implements Serializable { - - private static final long serialVersionUID = 7526472295622776147L; - - private final UnboundedTextSource source; - - private final String[] texts = new String[] {"foo foo foo bar bar", "foo foo bar bar bar"}; - private long index = 0; - - private String currentRecord; - - private Instant currentTimestamp; - - public UnboundedTextReader(UnboundedTextSource source) { - this.source = source; - } - - @Override - public boolean start() throws IOException { - currentRecord = texts[0]; - currentTimestamp = new Instant(0); - return true; - } - - @Override - public boolean advance() throws IOException { - index++; - currentRecord = texts[(int) index % (texts.length)]; - currentTimestamp = new Instant(index * 1000); - try { - Thread.sleep(index); // allow for downstream processing to complete - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - return true; - } - - @Override - public byte[] getCurrentRecordId() throws NoSuchElementException { - return new byte[0]; - } - - @Override - public String getCurrent() throws NoSuchElementException { - return this.currentRecord; - } - - @Override - public Instant getCurrentTimestamp() throws NoSuchElementException { - return currentTimestamp; - } - - @Override - public void close() throws IOException {} - - @Override - public Instant getWatermark() { - return currentTimestamp; - } - - @Override - public CheckpointMark getCheckpointMark() { - return null; - } - - @Override - public UnboundedSource getCurrentSource() { - return this.source; - } - } -} diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/examples/WordCountTest.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/examples/WordCountTest.java deleted file mode 100644 index d1ba0ac0d4f6..000000000000 --- a/runners/apex/src/test/java/org/apache/beam/runners/apex/examples/WordCountTest.java +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.examples; - -import java.io.File; -import java.nio.file.Files; -import java.util.HashSet; -import java.util.concurrent.ConcurrentHashMap; -import org.apache.beam.runners.apex.ApexPipelineOptions; -import org.apache.beam.runners.apex.ApexRunner; -import org.apache.beam.runners.apex.ApexRunnerResult; -import org.apache.beam.runners.apex.TestApexRunner; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.Read; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.options.Validation; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets; -import org.joda.time.Duration; -import org.junit.Assert; -import org.junit.Test; - -/** Windowed word count example on Apex runner. */ -public class WordCountTest { - - static class FormatAsStringFn extends DoFn, String> { - private static final long serialVersionUID = 1L; - - @ProcessElement - public void processElement(ProcessContext c) { - String row = - c.element().getKey() + " - " + c.element().getValue() + " @ " + c.timestamp().toString(); - c.output(row); - } - } - - static class ExtractWordsFn extends DoFn { - private static final long serialVersionUID = 1L; - private final Counter emptyLines = Metrics.counter("main", "emptyLines"); - - @SuppressWarnings("StringSplitter") - @ProcessElement - public void processElement(ProcessContext c) { - if (c.element().trim().isEmpty()) { - emptyLines.inc(1); - } - - // Split the line into words. - String[] words = c.element().split("[^a-zA-Z']+"); - - // Output each word encountered into the output PCollection. - for (String word : words) { - if (!word.isEmpty()) { - c.output(word); - } - } - } - } - - /** Options for word count example. */ - public interface WordCountOptions extends ApexPipelineOptions { - @Description("Path of the file to read from") - @Validation.Required - String getInputFile(); - - void setInputFile(String value); - - @Description("Path of the file to write to") - @Validation.Required - String getOutput(); - - void setOutput(String value); - } - - static void runWordCount(WordCountOptions options) { - Pipeline p = Pipeline.create(options); - p.apply("ReadLines", TextIO.read().from(options.getInputFile())) - .apply(ParDo.of(new ExtractWordsFn())) - .apply(Count.perElement()) - .apply(ParDo.of(new FormatAsStringFn())) - .apply("WriteCounts", TextIO.write().to(options.getOutput())); - p.run().waitUntilFinish(); - } - - public static void main(String[] args) { - WordCountOptions options = - PipelineOptionsFactory.fromArgs(args).withValidation().as(WordCountOptions.class); - - runWordCount(options); - } - - @Test - public void testWordCountExample() throws Exception { - PipelineOptionsFactory.register(WordCountOptions.class); - WordCountOptions options = TestPipeline.testingPipelineOptions().as(WordCountOptions.class); - options.setRunner(TestApexRunner.class); - options.setApplicationName("StreamingWordCount"); - String inputFile = WordCountTest.class.getResource("/words.txt").getFile(); - options.setInputFile(new File(inputFile).getAbsolutePath()); - String outputFilePrefix = "target/wordcountresult.txt"; - options.setOutput(outputFilePrefix); - - File outFile1 = new File(outputFilePrefix + "-00000-of-00002"); - File outFile2 = new File(outputFilePrefix + "-00001-of-00002"); - Assert.assertTrue(!outFile1.exists() || outFile1.delete()); - Assert.assertTrue(!outFile2.exists() || outFile2.delete()); - - WordCountTest.runWordCount(options); - - Assert.assertTrue("result files exist", outFile1.exists() && outFile2.exists()); - HashSet results = new HashSet<>(); - results.addAll(Files.readAllLines(outFile1.toPath())); - results.addAll(Files.readAllLines(outFile2.toPath())); - HashSet expectedOutput = - Sets.newHashSet( - "foo - 5 @ 294247-01-09T04:00:54.775Z", "bar - 5 @ 294247-01-09T04:00:54.775Z"); - Assert.assertEquals("expected output", expectedOutput, results); - } - - static class CollectResultsFn extends DoFn, String> { - static final ConcurrentHashMap RESULTS = new ConcurrentHashMap<>(); - - @ProcessElement - public void processElement(ProcessContext c) { - RESULTS.put(c.element().getKey(), c.element().getValue()); - } - } - - @Test - public void testWindowedWordCount() throws Exception { - String[] args = new String[] {"--runner=" + ApexRunner.class.getName()}; - ApexPipelineOptions options = - PipelineOptionsFactory.fromArgs(args).withValidation().as(ApexPipelineOptions.class); - options.setApplicationName("StreamingWordCount"); - Pipeline p = Pipeline.create(options); - - PCollection> wordCounts = - p.apply(Read.from(new UnboundedTextSource())) - .apply(ParDo.of(new ExtractWordsFn())) - .apply(Window.into(FixedWindows.of(Duration.standardSeconds(10)))) - .apply(Count.perElement()); - - wordCounts.apply(ParDo.of(new CollectResultsFn())); - - ApexRunnerResult result = (ApexRunnerResult) p.run(); - Assert.assertNotNull(result.getApexDAG().getOperatorMeta("Read(UnboundedTextSource)")); - long timeout = System.currentTimeMillis() + 30000; - while (System.currentTimeMillis() < timeout) { - if (CollectResultsFn.RESULTS.containsKey("foo") - && CollectResultsFn.RESULTS.containsKey("bar")) { - break; - } - result.waitUntilFinish(Duration.millis(1000)); - } - result.cancel(); - Assert.assertTrue( - CollectResultsFn.RESULTS.containsKey("foo") && CollectResultsFn.RESULTS.containsKey("bar")); - CollectResultsFn.RESULTS.clear(); - } -} diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/examples/package-info.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/examples/package-info.java deleted file mode 100644 index ef8bae366d03..000000000000 --- a/runners/apex/src/test/java/org/apache/beam/runners/apex/examples/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Implementation of the Beam runner for Apache Apex. */ -package org.apache.beam.runners.apex.examples; diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/ApexGroupByKeyOperatorTest.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/ApexGroupByKeyOperatorTest.java deleted file mode 100644 index 40bdfe0e99ef..000000000000 --- a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/ApexGroupByKeyOperatorTest.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation; - -import com.datatorrent.api.Sink; -import com.datatorrent.lib.util.KryoCloneUtils; -import java.util.List; -import org.apache.beam.runners.apex.ApexPipelineOptions; -import org.apache.beam.runners.apex.TestApexRunner; -import org.apache.beam.runners.apex.translation.operators.ApexGroupByKeyOperator; -import org.apache.beam.runners.apex.translation.utils.ApexStateInternals; -import org.apache.beam.runners.apex.translation.utils.ApexStreamTuple; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.KvCoder; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.coders.VarIntCoder; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.IntervalWindow; -import org.apache.beam.sdk.transforms.windowing.PaneInfo; -import org.apache.beam.sdk.util.WindowedValue; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollection.IsBounded; -import org.apache.beam.sdk.values.WindowingStrategy; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.junit.Assert; -import org.junit.Test; - -/** Test for {@link ApexGroupByKeyOperator}. */ -public class ApexGroupByKeyOperatorTest { - - @Test - public void testGlobalWindowMinTimestamp() throws Exception { - ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class); - options.setRunner(TestApexRunner.class); - Pipeline pipeline = Pipeline.create(options); - - WindowingStrategy ws = - WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(10))); - PCollection> input = - PCollection.createPrimitiveOutputInternal( - pipeline, ws, IsBounded.BOUNDED, KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())); - - ApexGroupByKeyOperator operator = - new ApexGroupByKeyOperator<>(options, input, new ApexStateInternals.ApexStateBackend()); - - operator.setup(null); - operator.beginWindow(1); - Assert.assertNotNull("Serialization", operator = KryoCloneUtils.cloneObject(operator)); - - final List results = Lists.newArrayList(); - Sink sink = - new Sink() { - @Override - public void put(Object tuple) { - results.add(tuple); - } - - @Override - public int getCount(boolean reset) { - return 0; - } - }; - operator.output.setSink(sink); - operator.setup(null); - operator.beginWindow(1); - - Instant windowStart = BoundedWindow.TIMESTAMP_MIN_VALUE; - BoundedWindow window = new IntervalWindow(windowStart, windowStart.plus(10000)); - PaneInfo paneInfo = PaneInfo.NO_FIRING; - - WindowedValue> wv1 = - WindowedValue.of(KV.of("foo", 1), windowStart, window, paneInfo); - operator.input.process(ApexStreamTuple.DataTuple.of(wv1)); - - WindowedValue> wv2 = - WindowedValue.of(KV.of("foo", 1), windowStart, window, paneInfo); - operator.input.process(ApexStreamTuple.DataTuple.of(wv2)); - - ApexStreamTuple>> watermark = - ApexStreamTuple.WatermarkTuple.of(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()); - - Assert.assertEquals("number outputs", 0, results.size()); - operator.input.process(watermark); - Assert.assertEquals("number outputs", 2, results.size()); - @SuppressWarnings({"unchecked", "rawtypes"}) - ApexStreamTuple.DataTuple>>> dataTuple = - (ApexStreamTuple.DataTuple) results.get(0); - List counts = Lists.newArrayList(1, 1); - Assert.assertEquals("iterable", KV.of("foo", counts), dataTuple.getValue().getValue()); - Assert.assertEquals("expected watermark", watermark, results.get(1)); - } -} diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/FlattenPCollectionTranslatorTest.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/FlattenPCollectionTranslatorTest.java deleted file mode 100644 index c36491a949be..000000000000 --- a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/FlattenPCollectionTranslatorTest.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation; - -import com.datatorrent.api.DAG; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Set; -import org.apache.beam.runners.apex.ApexPipelineOptions; -import org.apache.beam.runners.apex.ApexRunner; -import org.apache.beam.runners.apex.ApexRunnerResult; -import org.apache.beam.runners.apex.TestApexRunner; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.Flatten; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionList; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets; -import org.junit.Assert; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Integration test for {@link FlattenPCollectionTranslator}. */ -public class FlattenPCollectionTranslatorTest { - private static final Logger LOG = LoggerFactory.getLogger(FlattenPCollectionTranslatorTest.class); - - @Test - public void test() throws Exception { - ApexPipelineOptions options = PipelineOptionsFactory.as(ApexPipelineOptions.class); - options.setRunner(ApexRunner.class); - Pipeline p = Pipeline.create(options); - - String[][] collections = {{"1"}, {"2"}, {"3"}, {"4"}, {"5"}}; - - Set expected = Sets.newHashSet(); - List> pcList = new ArrayList<>(); - for (String[] collection : collections) { - pcList.add( - p.apply(Create.of(ImmutableList.copyOf(collection)).withCoder(StringUtf8Coder.of()))); - expected.addAll(Arrays.asList(collection)); - } - - PCollection actual = PCollectionList.of(pcList).apply(Flatten.pCollections()); - actual.apply(ParDo.of(new EmbeddedCollector())); - - ApexRunnerResult result = (ApexRunnerResult) p.run(); - // TODO: verify translation - result.getApexDAG(); - long timeout = System.currentTimeMillis() + 30000; - while (System.currentTimeMillis() < timeout - && EmbeddedCollector.RESULTS.size() < expected.size()) { - LOG.info("Waiting for expected results."); - Thread.sleep(500); - } - - Assert.assertEquals("number results", expected.size(), EmbeddedCollector.RESULTS.size()); - Assert.assertEquals(expected, Sets.newHashSet(EmbeddedCollector.RESULTS)); - } - - private static class EmbeddedCollector extends DoFn { - private static final List RESULTS = Collections.synchronizedList(new ArrayList<>()); - - @ProcessElement - public void processElement(ProcessContext c) throws Exception { - RESULTS.add(c.element()); - } - } - - @Test - public void testFlattenSingleCollection() { - ApexPipelineOptions options = PipelineOptionsFactory.as(ApexPipelineOptions.class); - Pipeline p = Pipeline.create(); - PCollection single = p.apply(Create.of(Collections.singletonList("1"))); - PCollectionList.of(single) - .apply(Flatten.pCollections()) - .apply(ParDo.of(new EmbeddedCollector())); - DAG dag = TestApexRunner.translate(p, options); - Assert.assertNotNull( - dag.getOperatorMeta("ParDo(EmbeddedCollector)/ParMultiDo(EmbeddedCollector)")); - } -} diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/GroupByKeyTranslatorTest.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/GroupByKeyTranslatorTest.java deleted file mode 100644 index 11a732723544..000000000000 --- a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/GroupByKeyTranslatorTest.java +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation; - -import java.io.IOException; -import java.io.Serializable; -import java.util.Collections; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; -import java.util.Set; -import javax.annotation.Nullable; -import org.apache.beam.runners.apex.ApexPipelineOptions; -import org.apache.beam.runners.apex.ApexRunner; -import org.apache.beam.runners.apex.ApexRunnerResult; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.io.Read; -import org.apache.beam.sdk.io.UnboundedSource; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.windowing.FixedWindows; -import org.apache.beam.sdk.transforms.windowing.TimestampCombiner; -import org.apache.beam.sdk.transforms.windowing.Window; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.junit.Assert; -import org.junit.Test; - -/** Integration test for {@link GroupByKeyTranslator}. */ -public class GroupByKeyTranslatorTest { - - @SuppressWarnings({"unchecked"}) - @Test - public void test() throws Exception { - ApexPipelineOptions options = PipelineOptionsFactory.as(ApexPipelineOptions.class); - options.setApplicationName("GroupByKey"); - options.setRunner(ApexRunner.class); - Pipeline p = Pipeline.create(options); - - List> data = - Lists.newArrayList( - KV.of("foo", new Instant(1000)), - KV.of("foo", new Instant(1000)), - KV.of("foo", new Instant(2000)), - KV.of("bar", new Instant(1000)), - KV.of("bar", new Instant(2000)), - KV.of("bar", new Instant(2000))); - - // expected results assume outputAtLatestInputTimestamp - List>> expected = - Lists.newArrayList( - KV.of(new Instant(1000), KV.of("foo", 2L)), - KV.of(new Instant(1000), KV.of("bar", 1L)), - KV.of(new Instant(2000), KV.of("foo", 1L)), - KV.of(new Instant(2000), KV.of("bar", 2L))); - - p.apply(Read.from(new TestSource(data, new Instant(5000)))) - .apply( - Window.into(FixedWindows.of(Duration.standardSeconds(1))) - .withTimestampCombiner(TimestampCombiner.LATEST)) - .apply(Count.perElement()) - .apply(ParDo.of(new KeyedByTimestamp<>())) - .apply(ParDo.of(new EmbeddedCollector())); - - ApexRunnerResult result = (ApexRunnerResult) p.run(); - result.getApexDAG(); - - long timeout = System.currentTimeMillis() + 30000; - while (System.currentTimeMillis() < timeout) { - if (EmbeddedCollector.RESULTS.containsAll(expected)) { - break; - } - Thread.sleep(1000); - } - Assert.assertEquals(Sets.newHashSet(expected), EmbeddedCollector.RESULTS); - } - - private static class EmbeddedCollector extends DoFn { - private static final Set RESULTS = Collections.synchronizedSet(new HashSet<>()); - - @ProcessElement - public void processElement(ProcessContext c) throws Exception { - RESULTS.add(c.element()); - } - } - - private static class KeyedByTimestamp extends DoFn> { - @ProcessElement - public void processElement(ProcessContext c) throws Exception { - c.output(KV.of(c.timestamp(), c.element())); - } - } - - private static class TestSource extends UnboundedSource { - - private final List> data; - private final Instant watermark; - - public TestSource(List> data, Instant watermark) { - this.data = data; - this.watermark = watermark; - } - - @Override - public List> split( - int desiredNumSplits, PipelineOptions options) throws Exception { - return Collections.>singletonList(this); - } - - @Override - public UnboundedReader createReader( - PipelineOptions options, @Nullable CheckpointMark checkpointMark) { - return new TestReader(data, watermark, this); - } - - @Nullable - @Override - public Coder getCheckpointMarkCoder() { - return null; - } - - @Override - public Coder getOutputCoder() { - return StringUtf8Coder.of(); - } - - private static class TestReader extends UnboundedReader implements Serializable { - - private static final long serialVersionUID = 7526472295622776147L; - - private final List> data; - private final TestSource source; - - private Iterator> iterator; - private String currentRecord; - private Instant currentTimestamp; - private Instant watermark; - private boolean collected; - - public TestReader(List> data, Instant watermark, TestSource source) { - this.data = data; - this.source = source; - this.watermark = watermark; - } - - @Override - public boolean start() throws IOException { - iterator = data.iterator(); - return advance(); - } - - @Override - public boolean advance() throws IOException { - if (iterator.hasNext()) { - KV kv = iterator.next(); - collected = false; - currentRecord = kv.getKey(); - currentTimestamp = kv.getValue(); - return true; - } else { - return false; - } - } - - @Override - public byte[] getCurrentRecordId() throws NoSuchElementException { - return new byte[0]; - } - - @Override - public String getCurrent() throws NoSuchElementException { - collected = true; - return this.currentRecord; - } - - @Override - public Instant getCurrentTimestamp() throws NoSuchElementException { - return currentTimestamp; - } - - @Override - public void close() throws IOException {} - - @Override - public Instant getWatermark() { - if (!iterator.hasNext() && collected) { - return watermark; - } else { - return new Instant(0); - } - } - - @Override - public CheckpointMark getCheckpointMark() { - return null; - } - - @Override - public UnboundedSource getCurrentSource() { - return this.source; - } - } - } -} diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/ParDoTranslatorTest.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/ParDoTranslatorTest.java deleted file mode 100644 index 576172ab4be0..000000000000 --- a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/ParDoTranslatorTest.java +++ /dev/null @@ -1,353 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation; - -import static org.apache.beam.sdk.testing.PCollectionViewTesting.materializeValuesFor; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import com.datatorrent.api.DAG; -import com.datatorrent.api.Sink; -import com.datatorrent.lib.util.KryoCloneUtils; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.regex.Pattern; -import org.apache.beam.runners.apex.ApexPipelineOptions; -import org.apache.beam.runners.apex.ApexRunner; -import org.apache.beam.runners.apex.ApexRunnerResult; -import org.apache.beam.runners.apex.TestApexRunner; -import org.apache.beam.runners.apex.translation.operators.ApexParDoOperator; -import org.apache.beam.runners.apex.translation.operators.ApexReadUnboundedInputOperator; -import org.apache.beam.runners.apex.translation.utils.ApexStateInternals; -import org.apache.beam.runners.apex.translation.utils.ApexStreamTuple; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.SerializableCoder; -import org.apache.beam.sdk.coders.VarIntCoder; -import org.apache.beam.sdk.coders.VoidCoder; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.DoFnSchemaInformation; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.Sum; -import org.apache.beam.sdk.transforms.View; -import org.apache.beam.sdk.util.WindowedValue; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionTuple; -import org.apache.beam.sdk.values.PCollectionView; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.TupleTagList; -import org.apache.beam.sdk.values.WindowingStrategy; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets; -import org.junit.Assert; -import org.junit.Ignore; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** integration test for {@link ParDoTranslator}. */ -@RunWith(JUnit4.class) -public class ParDoTranslatorTest { - private static final Logger LOG = LoggerFactory.getLogger(ParDoTranslatorTest.class); - private static final long SLEEP_MILLIS = 500; - private static final long TIMEOUT_MILLIS = 30000; - - @Test - public void test() throws Exception { - ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class); - options.setApplicationName("ParDoBound"); - options.setRunner(ApexRunner.class); - - Pipeline p = Pipeline.create(options); - - List collection = Lists.newArrayList(1, 2, 3, 4, 5); - List expected = Lists.newArrayList(6, 7, 8, 9, 10); - p.apply(Create.of(collection).withCoder(SerializableCoder.of(Integer.class))) - .apply(ParDo.of(new Add(5))) - .apply(ParDo.of(new EmbeddedCollector())); - - ApexRunnerResult result = (ApexRunnerResult) p.run(); - DAG dag = result.getApexDAG(); - - DAG.OperatorMeta om = dag.getOperatorMeta("Create.Values"); - Assert.assertNotNull(om); - Assert.assertEquals(om.getOperator().getClass(), ApexReadUnboundedInputOperator.class); - - om = dag.getOperatorMeta("ParDo(Add)/ParMultiDo(Add)"); - Assert.assertNotNull(om); - Assert.assertEquals(om.getOperator().getClass(), ApexParDoOperator.class); - - long timeout = System.currentTimeMillis() + TIMEOUT_MILLIS; - while (System.currentTimeMillis() < timeout) { - if (EmbeddedCollector.RESULTS.containsAll(expected)) { - break; - } - LOG.info("Waiting for expected results."); - Thread.sleep(SLEEP_MILLIS); - } - Assert.assertEquals(Sets.newHashSet(expected), EmbeddedCollector.RESULTS); - } - - private static class Add extends DoFn { - private static final long serialVersionUID = 1L; - private Integer number; - private PCollectionView sideInputView; - - private Add(Integer number) { - this.number = number; - } - - private Add(PCollectionView sideInputView) { - this.sideInputView = sideInputView; - } - - @ProcessElement - public void processElement(ProcessContext c) throws Exception { - if (sideInputView != null) { - number = c.sideInput(sideInputView); - } - c.output(c.element() + number); - } - } - - private static class EmbeddedCollector extends DoFn { - private static final long serialVersionUID = 1L; - private static final Set RESULTS = Collections.synchronizedSet(new HashSet<>()); - - public EmbeddedCollector() { - RESULTS.clear(); - } - - @ProcessElement - public void processElement(ProcessContext c) throws Exception { - RESULTS.add(c.element()); - } - } - - private static Throwable runExpectingAssertionFailure(Pipeline pipeline) { - // We cannot use thrown.expect(AssertionError.class) because the AssertionError - // is first caught by JUnit and causes a test failure. - try { - pipeline.run(); - } catch (AssertionError exc) { - return exc; - } - fail("assertion should have failed"); - throw new RuntimeException("unreachable"); - } - - @Test - @Ignore("https://issues.apache.org/jira/browse/BEAM-3272") - public void testAssertionFailure() throws Exception { - ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class); - options.setRunner(TestApexRunner.class); - Pipeline pipeline = Pipeline.create(options); - - PCollection pcollection = pipeline.apply(Create.of(1, 2, 3, 4)); - PAssert.that(pcollection).containsInAnyOrder(2, 1, 4, 3, 7); - - Throwable exc = runExpectingAssertionFailure(pipeline); - Pattern expectedPattern = - Pattern.compile( - "Expected: iterable over \\[((<4>|<7>|<3>|<2>|<1>)(, )?){5}\\] in any order"); - // A loose pattern, but should get the job done. - assertTrue( - "Expected error message from PAssert with substring matching " - + expectedPattern - + " but the message was \"" - + exc.getMessage() - + "\"", - expectedPattern.matcher(exc.getMessage()).find()); - } - - @Test - public void testContainsInAnyOrder() throws Exception { - ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class); - options.setRunner(TestApexRunner.class); - Pipeline pipeline = Pipeline.create(options); - PCollection pcollection = pipeline.apply(Create.of(1, 2, 3, 4)); - PAssert.that(pcollection).containsInAnyOrder(2, 1, 4, 3); - // TODO: terminate faster based on processed assertion vs. auto-shutdown - pipeline.run(); - } - - @Test - public void testSerialization() throws Exception { - ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class); - options.setRunner(TestApexRunner.class); - Pipeline pipeline = Pipeline.create(options); - - PCollectionView singletonView = - pipeline.apply(Create.of(1)).apply(Sum.integersGlobally().asSingletonView()); - - ApexParDoOperator operator = - new ApexParDoOperator<>( - options, - new Add(singletonView), - new TupleTag<>(), - TupleTagList.empty().getAll(), - WindowingStrategy.globalDefault(), - Collections.singletonList(singletonView), - VarIntCoder.of(), - Collections.emptyMap(), - DoFnSchemaInformation.create(), - Collections.emptyMap(), - new ApexStateInternals.ApexStateBackend()); - operator.setup(null); - operator.beginWindow(0); - WindowedValue wv1 = WindowedValue.valueInGlobalWindow(1); - WindowedValue> sideInput = - WindowedValue.valueInGlobalWindow(materializeValuesFor(options, View.asSingleton(), 22)); - operator.input.process(ApexStreamTuple.DataTuple.of(wv1)); // pushed back input - - final List results = Lists.newArrayList(); - Sink sink = - new Sink() { - @Override - public void put(Object tuple) { - results.add(tuple); - } - - @Override - public int getCount(boolean reset) { - return 0; - } - }; - - // verify pushed back input checkpointing - Assert.assertNotNull("Serialization", operator = KryoCloneUtils.cloneObject(operator)); - operator.output.setSink(sink); - operator.setup(null); - operator.beginWindow(1); - WindowedValue wv2 = WindowedValue.valueInGlobalWindow(2); - operator.sideInput1.process(ApexStreamTuple.DataTuple.of(sideInput)); - Assert.assertEquals("number outputs", 1, results.size()); - Assert.assertEquals( - "result", - WindowedValue.valueInGlobalWindow(23), - ((ApexStreamTuple.DataTuple) results.get(0)).getValue()); - - // verify side input checkpointing - results.clear(); - Assert.assertNotNull("Serialization", operator = KryoCloneUtils.cloneObject(operator)); - operator.output.setSink(sink); - operator.setup(null); - operator.beginWindow(2); - operator.input.process(ApexStreamTuple.DataTuple.of(wv2)); - Assert.assertEquals("number outputs", 1, results.size()); - Assert.assertEquals( - "result", - WindowedValue.valueInGlobalWindow(24), - ((ApexStreamTuple.DataTuple) results.get(0)).getValue()); - } - - @Test - public void testMultiOutputParDoWithSideInputs() throws Exception { - ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class); - options.setRunner(ApexRunner.class); // non-blocking run - Pipeline pipeline = Pipeline.create(options); - - List inputs = Arrays.asList(3, -42, 666); - final TupleTag mainOutputTag = new TupleTag<>("main"); - final TupleTag additionalOutputTag = new TupleTag<>("output"); - - PCollectionView sideInput1 = - pipeline - .apply("CreateSideInput1", Create.of(11)) - .apply("ViewSideInput1", View.asSingleton()); - PCollectionView sideInputUnread = - pipeline - .apply("CreateSideInputUnread", Create.of(-3333)) - .apply("ViewSideInputUnread", View.asSingleton()); - PCollectionView sideInput2 = - pipeline - .apply("CreateSideInput2", Create.of(222)) - .apply("ViewSideInput2", View.asSingleton()); - - PCollectionTuple outputs = - pipeline - .apply(Create.of(inputs)) - .apply( - ParDo.of( - new TestMultiOutputWithSideInputsFn( - Arrays.asList(sideInput1, sideInput2), Arrays.asList())) - .withSideInputs(sideInput1) - .withSideInputs(sideInputUnread) - .withSideInputs(sideInput2) - .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag))); - - outputs.get(mainOutputTag).apply(ParDo.of(new EmbeddedCollector())); - outputs.get(additionalOutputTag).setCoder(VoidCoder.of()); - ApexRunnerResult result = (ApexRunnerResult) pipeline.run(); - - HashSet expected = - Sets.newHashSet( - "processing: 3: [11, 222]", "processing: -42: [11, 222]", "processing: 666: [11, 222]"); - long timeout = System.currentTimeMillis() + TIMEOUT_MILLIS; - while (System.currentTimeMillis() < timeout) { - if (EmbeddedCollector.RESULTS.containsAll(expected)) { - break; - } - LOG.info("Waiting for expected results."); - Thread.sleep(SLEEP_MILLIS); - } - result.cancel(); - Assert.assertEquals(Sets.newHashSet(expected), EmbeddedCollector.RESULTS); - } - - private static class TestMultiOutputWithSideInputsFn extends DoFn { - private static final long serialVersionUID = 1L; - - final List> sideInputViews = new ArrayList<>(); - final List> additionalOutputTupleTags = new ArrayList<>(); - - public TestMultiOutputWithSideInputsFn( - List> sideInputViews, - List> additionalOutputTupleTags) { - this.sideInputViews.addAll(sideInputViews); - this.additionalOutputTupleTags.addAll(additionalOutputTupleTags); - } - - @ProcessElement - public void processElement(ProcessContext c) throws Exception { - outputToAllWithSideInputs(c, "processing: " + c.element()); - } - - private void outputToAllWithSideInputs(ProcessContext c, String value) { - if (!sideInputViews.isEmpty()) { - List sideInputValues = new ArrayList<>(); - for (PCollectionView sideInputView : sideInputViews) { - sideInputValues.add(c.sideInput(sideInputView)); - } - value += ": " + sideInputValues; - } - c.output(value); - for (TupleTag additionalOutputTupleTag : additionalOutputTupleTags) { - c.output(additionalOutputTupleTag, additionalOutputTupleTag.getId() + ": " + value); - } - } - } -} diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/ReadUnboundTranslatorTest.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/ReadUnboundTranslatorTest.java deleted file mode 100644 index 4c7dc26ec67d..000000000000 --- a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/ReadUnboundTranslatorTest.java +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation; - -import com.datatorrent.api.DAG; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import org.apache.beam.runners.apex.ApexPipelineOptions; -import org.apache.beam.runners.apex.ApexRunner; -import org.apache.beam.runners.apex.ApexRunnerResult; -import org.apache.beam.runners.apex.translation.operators.ApexReadUnboundedInputOperator; -import org.apache.beam.runners.apex.translation.utils.CollectionSource; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.io.GenerateSequence; -import org.apache.beam.sdk.io.Read; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ContiguousSet; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.DiscreteDomain; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Range; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets; -import org.junit.Assert; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** integration test for {@link ReadUnboundedTranslator}. */ -public class ReadUnboundTranslatorTest { - private static final Logger LOG = LoggerFactory.getLogger(ReadUnboundTranslatorTest.class); - - @Test - public void test() throws Exception { - ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class); - EmbeddedCollector.RESULTS.clear(); - options.setApplicationName("ReadUnbound"); - options.setRunner(ApexRunner.class); - Pipeline p = Pipeline.create(options); - - List collection = Lists.newArrayList("1", "2", "3", "4", "5"); - CollectionSource source = new CollectionSource<>(collection, StringUtf8Coder.of()); - p.apply(Read.from(source)).apply(ParDo.of(new EmbeddedCollector())); - - ApexRunnerResult result = (ApexRunnerResult) p.run(); - DAG dag = result.getApexDAG(); - DAG.OperatorMeta om = dag.getOperatorMeta("Read(CollectionSource)"); - Assert.assertNotNull(om); - Assert.assertEquals(om.getOperator().getClass(), ApexReadUnboundedInputOperator.class); - - long timeout = System.currentTimeMillis() + 30000; - while (System.currentTimeMillis() < timeout) { - if (EmbeddedCollector.RESULTS.containsAll(collection)) { - break; - } - LOG.info("Waiting for expected results."); - Thread.sleep(1000); - } - Assert.assertEquals(Sets.newHashSet(collection), EmbeddedCollector.RESULTS); - } - - @Test - public void testReadBounded() throws Exception { - ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class); - EmbeddedCollector.RESULTS.clear(); - options.setApplicationName("ReadBounded"); - options.setRunner(ApexRunner.class); - Pipeline p = Pipeline.create(options); - - Set expected = ContiguousSet.create(Range.closedOpen(0L, 10L), DiscreteDomain.longs()); - p.apply(GenerateSequence.from(0).to(10)).apply(ParDo.of(new EmbeddedCollector())); - - ApexRunnerResult result = (ApexRunnerResult) p.run(); - DAG dag = result.getApexDAG(); - String operatorName = "GenerateSequence/Read(BoundedCountingSource)"; - DAG.OperatorMeta om = dag.getOperatorMeta(operatorName); - Assert.assertNotNull(om); - Assert.assertEquals(om.getOperator().getClass(), ApexReadUnboundedInputOperator.class); - - long timeout = System.currentTimeMillis() + 30000; - while (System.currentTimeMillis() < timeout) { - if (EmbeddedCollector.RESULTS.containsAll(expected)) { - break; - } - LOG.info("Waiting for expected results."); - Thread.sleep(1000); - } - Assert.assertEquals(Sets.newHashSet(expected), EmbeddedCollector.RESULTS); - } - - private static class EmbeddedCollector extends DoFn { - private static final long serialVersionUID = 1L; - private static final Set RESULTS = Collections.synchronizedSet(new HashSet<>()); - - @ProcessElement - public void processElement(ProcessContext c) throws Exception { - RESULTS.add(c.element()); - } - } -} diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/SideInputTranslationTest.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/SideInputTranslationTest.java deleted file mode 100644 index f579096b59f2..000000000000 --- a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/SideInputTranslationTest.java +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; - -import com.datatorrent.api.DAG; -import com.datatorrent.api.DAG.OperatorMeta; -import com.datatorrent.stram.engine.PortContext; -import com.datatorrent.stram.plan.logical.LogicalPlan; -import java.io.Serializable; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.concurrent.atomic.AtomicReference; -import org.apache.beam.runners.apex.ApexPipelineOptions; -import org.apache.beam.runners.apex.TestApexRunner; -import org.apache.beam.runners.apex.translation.utils.ApexStreamTuple.ApexStreamTupleCoder; -import org.apache.beam.runners.apex.translation.utils.CoderAdapterStreamCodec; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.KvCoder; -import org.apache.beam.sdk.coders.ListCoder; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.coders.VarIntCoder; -import org.apache.beam.sdk.coders.VoidCoder; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.View; -import org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionView; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Test that view overrides are applied by checking the corresponding side input coders. Unlike - * runner validation these don't run the pipeline, they only check translation. - */ -@RunWith(JUnit4.class) -public class SideInputTranslationTest implements Serializable { - private static final AtomicReference SIDE_INPUT_ACCESSED = new AtomicReference<>(); - - @Test - public void testMapAsEntrySetSideInput() { - SIDE_INPUT_ACCESSED.set(false); - - ApexPipelineOptions options = PipelineOptionsFactory.as(ApexPipelineOptions.class); - options.setApplicationName("SideInputTranslationTest"); - options.setRunner(TestApexRunner.class); - Pipeline pipeline = Pipeline.create(options); - - final PCollectionView> view = - pipeline - .apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("b", 3))) - .apply(View.asMap()); - - PCollection> output = - pipeline - .apply("CreateMainInput", Create.of(2 /* size */)) - .apply( - "OutputSideInputs", - ParDo.of( - new DoFn>() { - @ProcessElement - public void processElement(ProcessContext c) { - assertEquals((int) c.element(), c.sideInput(view).size()); - assertEquals((int) c.element(), c.sideInput(view).entrySet().size()); - for (Entry entry : c.sideInput(view).entrySet()) { - c.output(KV.of(entry.getKey(), entry.getValue())); - } - // Using this to ensure that execution really reaches this point, - // as a workaround for https://issues.apache.org/jira/browse/BEAM-3261. - // When that issue is resolved, this test should simply be deleted, - // as it duplicates a test in ViewTest. - SIDE_INPUT_ACCESSED.set(true); - } - }) - .withSideInputs(view)); - - PAssert.that(output).containsInAnyOrder(KV.of("a", 1), KV.of("b", 3)); - - pipeline.run(); - - assertTrue(SIDE_INPUT_ACCESSED.get()); - } - - @Test - public void testListSideInputTranslation() throws Exception { - assertEquals( - ListCoder.of(KvCoder.of(VoidCoder.of(), VarIntCoder.of())), - getTranslatedSideInputCoder(ImmutableList.of(11, 13, 17, 23), View.asList())); - } - - @Test - public void testMapSideInputTranslation() throws Exception { - assertEquals( - ListCoder.of( - KvCoder.of(VoidCoder.of(), KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()))), - getTranslatedSideInputCoder(ImmutableList.of(KV.of("a", 1), KV.of("b", 3)), View.asMap())); - } - - @Test - public void testMultimapSideInputTranslation() throws Exception { - assertEquals( - ListCoder.of( - KvCoder.of(VoidCoder.of(), KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()))), - getTranslatedSideInputCoder( - ImmutableList.of(KV.of("a", 1), KV.of("a", 2), KV.of("b", 3)), View.asMultimap())); - } - - private Coder getTranslatedSideInputCoder( - List items, PTransform, ? extends PCollectionView> viewTransform) - throws Exception { - Pipeline p = Pipeline.create(); - PCollectionView view = p.apply(Create.of(items)).apply(viewTransform); - p.apply(Create.of(1)) - .apply( - "ParDo", - ParDo.of( - new DoFn>() { - @ProcessElement - public void processElement(ProcessContext c) {} - }) - .withSideInputs(view)); - DAG dag = - TestApexRunner.translate(p, PipelineOptionsFactory.create().as(ApexPipelineOptions.class)); - - OperatorMeta om = dag.getOperatorMeta("ParDo/ParMultiDo(Anonymous)"); - assertNotNull(om); - assertEquals(2, om.getInputStreams().size()); - - DAG.InputPortMeta sideInput = null; - for (DAG.InputPortMeta input : om.getInputStreams().keySet()) { - if ("sideInput1".equals(((LogicalPlan.InputPortMeta) input).getPortName())) { - sideInput = input; - } - } - assertNotNull("could not find stream for: sideInput1", sideInput); - - CoderAdapterStreamCodec sc = - (CoderAdapterStreamCodec) sideInput.getAttributes().get(PortContext.STREAM_CODEC); - @SuppressWarnings("rawtypes") - ApexStreamTupleCoder coder = (ApexStreamTupleCoder) sc.getCoder(); - @SuppressWarnings("rawtypes") - FullWindowedValueCoder fwvc = (FullWindowedValueCoder) coder.getValueCoder(); - return fwvc.getValueCoder(); - } -} diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/operators/ApexTimerInternalsTest.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/operators/ApexTimerInternalsTest.java deleted file mode 100644 index f4e8d10d36bc..000000000000 --- a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/operators/ApexTimerInternalsTest.java +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation.operators; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; - -import com.datatorrent.lib.util.KryoCloneUtils; -import com.datatorrent.netlet.util.Slice; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; -import org.apache.beam.runners.apex.translation.operators.ApexTimerInternals.TimerProcessor; -import org.apache.beam.runners.core.StateNamespaces; -import org.apache.beam.runners.core.TimerInternals.TimerData; -import org.apache.beam.runners.core.TimerInternals.TimerDataCoderV2; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.state.TimeDomain; -import org.apache.beam.sdk.transforms.windowing.GlobalWindow; -import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets; -import org.joda.time.Instant; -import org.junit.Test; - -/** Tests for {@link ApexTimerInternals}. */ -public class ApexTimerInternalsTest { - - @Test - public void testEventTimeTimers() { - - final Map> firedTimers = new HashMap<>(); - TimerProcessor timerProcessor = firedTimers::put; - - TimerDataCoderV2 timerDataCoder = TimerDataCoderV2.of(GlobalWindow.Coder.INSTANCE); - String key1 = "key1"; - Instant instant0 = new Instant(0); - Instant instant1 = new Instant(1); - Instant instant2 = new Instant(2); - - ApexTimerInternals timerInternals = new ApexTimerInternals<>(timerDataCoder); - timerInternals.setContext(key1, StringUtf8Coder.of(), Instant.now(), null); - - TimerData timerData0 = - TimerData.of( - "timerData0", StateNamespaces.global(), instant0, instant0, TimeDomain.EVENT_TIME); - timerInternals.setTimer(timerData0); - - TimerData timerData1 = - TimerData.of( - "timerData1", StateNamespaces.global(), instant1, instant1, TimeDomain.EVENT_TIME); - timerInternals.setTimer(timerData1); - - timerInternals.fireReadyTimers(instant0.getMillis(), timerProcessor, TimeDomain.EVENT_TIME); - assertEquals(0, firedTimers.size()); - firedTimers.clear(); - - timerInternals.fireReadyTimers( - instant1.getMillis(), timerProcessor, TimeDomain.PROCESSING_TIME); - assertEquals(0, firedTimers.size()); - timerInternals.fireReadyTimers(instant1.getMillis(), timerProcessor, TimeDomain.EVENT_TIME); - assertEquals(1, firedTimers.size()); - assertEquals( - Sets.newHashSet(timerData0), Sets.newHashSet(firedTimers.values().iterator().next())); - firedTimers.clear(); - - timerInternals.fireReadyTimers(instant2.getMillis(), timerProcessor, TimeDomain.EVENT_TIME); - assertEquals(1, firedTimers.size()); - assertEquals( - Sets.newHashSet(timerData1), Sets.newHashSet(firedTimers.values().iterator().next())); - firedTimers.clear(); - } - - @Test - public void testDeleteTimer() { - TimerDataCoderV2 timerDataCoder = TimerDataCoderV2.of(GlobalWindow.Coder.INSTANCE); - String key1 = "key1"; - Instant instant0 = new Instant(0); - Instant instant1 = new Instant(1); - - ApexTimerInternals timerInternals = new ApexTimerInternals<>(timerDataCoder); - timerInternals.setContext(key1, StringUtf8Coder.of(), Instant.now(), null); - - TimerData timerData0 = - TimerData.of( - "timerData0", StateNamespaces.global(), instant0, instant0, TimeDomain.EVENT_TIME); - timerInternals.setTimer(timerData0); - - TimerData timerData1 = - TimerData.of( - "timerData1", StateNamespaces.global(), instant1, instant1, TimeDomain.EVENT_TIME); - timerInternals.setTimer(timerData1); - - Map> timerMap = timerInternals.getTimerSet(TimeDomain.EVENT_TIME).getMap(); - assertEquals(1, timerMap.size()); - assertEquals(2, timerMap.values().iterator().next().size()); - - timerInternals.deleteTimer(timerData0.getNamespace(), timerData0.getTimerId(), ""); - assertEquals(1, timerMap.size()); - assertEquals(1, timerMap.values().iterator().next().size()); - - timerInternals.deleteTimer( - timerData1.getNamespace(), timerData1.getTimerId(), TimeDomain.PROCESSING_TIME); - assertEquals(1, timerMap.size()); - assertEquals(1, timerMap.values().iterator().next().size()); - - timerInternals.deleteTimer( - timerData1.getNamespace(), timerData1.getTimerId(), TimeDomain.EVENT_TIME); - assertEquals(0, timerMap.size()); - } - - @Test - public void testSerialization() { - TimerDataCoderV2 timerDataCoder = TimerDataCoderV2.of(GlobalWindow.Coder.INSTANCE); - TimerData timerData = - TimerData.of( - "arbitrary-id", - StateNamespaces.global(), - new Instant(0), - new Instant(0), - TimeDomain.EVENT_TIME); - String key = "key"; - ApexTimerInternals timerInternals = new ApexTimerInternals<>(timerDataCoder); - timerInternals.setContext(key, StringUtf8Coder.of(), Instant.now(), null); - timerInternals.setTimer(timerData); - ApexTimerInternals cloned; - assertNotNull("Serialization", cloned = KryoCloneUtils.cloneObject(timerInternals)); - cloned.setContext(key, StringUtf8Coder.of(), Instant.now(), null); - Map> timers = cloned.getTimerSet(TimeDomain.EVENT_TIME).getMap(); - assertEquals(1, timers.size()); - } -} diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/utils/ApexStateInternalsTest.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/utils/ApexStateInternalsTest.java deleted file mode 100644 index 8e6b6195f3fb..000000000000 --- a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/utils/ApexStateInternalsTest.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation.utils; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertThat; - -import com.datatorrent.lib.util.KryoCloneUtils; -import org.apache.beam.runners.core.StateInternals; -import org.apache.beam.runners.core.StateInternalsTest; -import org.apache.beam.runners.core.StateNamespace; -import org.apache.beam.runners.core.StateNamespaceForTest; -import org.apache.beam.runners.core.StateTag; -import org.apache.beam.runners.core.StateTags; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.state.ValueState; -import org.hamcrest.Matchers; -import org.junit.Ignore; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Tests for {@link ApexStateInternals}. This is based on the tests for {@code StateInternalsTest}. - */ -public class ApexStateInternalsTest { - - private static StateInternals newStateInternals() { - return new ApexStateInternals.ApexStateBackend() - .newStateInternalsFactory(StringUtf8Coder.of()) - .stateInternalsForKey("dummyKey"); - } - - /** A standard StateInternals test. Ignore set and map tests. */ - @RunWith(JUnit4.class) - public static class StandardStateInternalsTests extends StateInternalsTest { - @Override - protected StateInternals createStateInternals() { - return newStateInternals(); - } - - @Override - @Ignore - public void testSet() {} - - @Override - @Ignore - public void testSetIsEmpty() {} - - @Override - @Ignore - public void testMergeSetIntoSource() {} - - @Override - @Ignore - public void testMergeSetIntoNewNamespace() {} - - @Override - @Ignore - public void testMap() {} - - @Override - @Ignore - public void testSetReadable() {} - - @Override - @Ignore - public void testMapReadable() {} - } - - /** A specific test of ApexStateInternalsTest. */ - @RunWith(JUnit4.class) - public static class OtherTests { - - private static final StateNamespace NAMESPACE = new StateNamespaceForTest("ns"); - private static final StateTag> STRING_VALUE_ADDR = - StateTags.value("stringValue", StringUtf8Coder.of()); - - @Test - public void testSerialization() throws Exception { - ApexStateInternals.ApexStateInternalsFactory sif = - new ApexStateInternals.ApexStateBackend().newStateInternalsFactory(StringUtf8Coder.of()); - ApexStateInternals keyAndState = sif.stateInternalsForKey("dummy"); - - ValueState value = keyAndState.state(NAMESPACE, STRING_VALUE_ADDR); - assertEquals(keyAndState.state(NAMESPACE, STRING_VALUE_ADDR), value); - value.write("hello"); - - ApexStateInternals.ApexStateInternalsFactory cloned; - assertNotNull("Serialization", cloned = KryoCloneUtils.cloneObject(sif)); - ApexStateInternals clonedKeyAndState = cloned.stateInternalsForKey("dummy"); - - ValueState clonedValue = clonedKeyAndState.state(NAMESPACE, STRING_VALUE_ADDR); - assertThat(clonedValue.read(), Matchers.equalTo("hello")); - assertEquals(clonedKeyAndState.state(NAMESPACE, STRING_VALUE_ADDR), value); - } - } -} diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/utils/CollectionSource.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/utils/CollectionSource.java deleted file mode 100644 index 166dc2cdde77..000000000000 --- a/runners/apex/src/test/java/org/apache/beam/runners/apex/translation/utils/CollectionSource.java +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.apex.translation.utils; - -import java.io.IOException; -import java.io.Serializable; -import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; -import javax.annotation.Nullable; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.io.UnboundedSource; -import org.apache.beam.sdk.options.PipelineOptions; -import org.joda.time.Instant; - -/** collection as {@link UnboundedSource}, used for tests. */ -public class CollectionSource extends UnboundedSource { - private static final long serialVersionUID = 1L; - private final Collection collection; - private final Coder coder; - - public CollectionSource(Collection collection, Coder coder) { - this.collection = collection; - this.coder = coder; - } - - @Override - public List> split( - int desiredNumSplits, PipelineOptions options) throws Exception { - return Collections.singletonList(this); - } - - @Override - public UnboundedReader createReader( - PipelineOptions options, @Nullable UnboundedSource.CheckpointMark checkpointMark) { - return new CollectionReader<>(collection, this); - } - - @Nullable - @Override - public Coder getCheckpointMarkCoder() { - return null; - } - - @Override - public Coder getOutputCoder() { - return coder; - } - - private static class CollectionReader extends UnboundedSource.UnboundedReader - implements Serializable { - - private T current; - private final CollectionSource source; - private final Collection collection; - private Iterator iterator; - - public CollectionReader(Collection collection, CollectionSource source) { - this.collection = collection; - this.source = source; - } - - @Override - public boolean start() throws IOException { - if (null == iterator) { - iterator = collection.iterator(); - } - return advance(); - } - - @Override - public boolean advance() throws IOException { - if (iterator.hasNext()) { - current = iterator.next(); - return true; - } else { - return false; - } - } - - @Override - public Instant getWatermark() { - return Instant.now(); - } - - @Override - public UnboundedSource.CheckpointMark getCheckpointMark() { - return null; - } - - @Override - public UnboundedSource getCurrentSource() { - return source; - } - - @Override - public T getCurrent() throws NoSuchElementException { - return current; - } - - @Override - public Instant getCurrentTimestamp() throws NoSuchElementException { - return Instant.now(); - } - - @Override - public void close() throws IOException {} - } -} diff --git a/runners/apex/src/test/resources/beam-runners-apex.properties b/runners/apex/src/test/resources/beam-runners-apex.properties deleted file mode 100644 index 55cb397390c7..000000000000 --- a/runners/apex/src/test/resources/beam-runners-apex.properties +++ /dev/null @@ -1,20 +0,0 @@ -################################################################################ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -################################################################################ - -# properties for unit test -apex.operator.testProperties.attr.MEMORY_MB=32 diff --git a/runners/apex/src/test/resources/log4j.properties b/runners/apex/src/test/resources/log4j.properties deleted file mode 100644 index d1e6b44b03d9..000000000000 --- a/runners/apex/src/test/resources/log4j.properties +++ /dev/null @@ -1,35 +0,0 @@ -################################################################################ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -################################################################################ - -# Set root logger level to OFF to not flood build logs -# set manually to INFO for debugging purposes -log4j.rootLogger=OFF, testlogger - -# A1 is set to be a ConsoleAppender. -log4j.appender.testlogger=org.apache.log4j.ConsoleAppender -log4j.appender.testlogger.target = System.err -log4j.appender.testlogger.layout=org.apache.log4j.PatternLayout -log4j.appender.testlogger.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n -log4j.appender.testlogger.threshold=${test.log.threshold} -test.log.threshold=DEBUG - -log4j.logger.org=info -log4j.logger.org.apache.commons.beanutils=warn -log4j.logger.com.datatorrent=info -log4j.logger.org.apache.apex=info -log4j.logger.org.apache.beam.runners.apex=debug diff --git a/runners/apex/src/test/resources/words.txt b/runners/apex/src/test/resources/words.txt deleted file mode 100644 index 94151eea619f..000000000000 --- a/runners/apex/src/test/resources/words.txt +++ /dev/null @@ -1,3 +0,0 @@ - -foo foo foo bar bar -foo foo bar bar bar diff --git a/runners/jet/src/main/java/org/apache/beam/runners/jet/JetRunnerRegistrar.java b/runners/jet/src/main/java/org/apache/beam/runners/jet/JetRunnerRegistrar.java index 702693039a5a..203d1cb6dc8a 100644 --- a/runners/jet/src/main/java/org/apache/beam/runners/jet/JetRunnerRegistrar.java +++ b/runners/jet/src/main/java/org/apache/beam/runners/jet/JetRunnerRegistrar.java @@ -28,7 +28,7 @@ * Contains the {@link PipelineRunnerRegistrar} and {@link PipelineOptionsRegistrar} for the {@link * JetRunner}. * - *

{@link AutoService} will register Apex's implementations of the {@link PipelineRunner} and + *

{@link AutoService} will register Jet's implementations of the {@link PipelineRunner} and * {@link PipelineOptions} as available pipeline runner services. */ public final class JetRunnerRegistrar { diff --git a/runners/jet/src/test/java/org/apache/beam/runners/jet/JetTestRunnerRegistrar.java b/runners/jet/src/test/java/org/apache/beam/runners/jet/JetTestRunnerRegistrar.java index 875edb74e327..831b5673f070 100644 --- a/runners/jet/src/test/java/org/apache/beam/runners/jet/JetTestRunnerRegistrar.java +++ b/runners/jet/src/test/java/org/apache/beam/runners/jet/JetTestRunnerRegistrar.java @@ -28,7 +28,7 @@ * Contains the {@link PipelineRunnerRegistrar} and {@link PipelineOptionsRegistrar} for the {@link * TestJetRunner}. * - *

{@link AutoService} will register Apex's implementations of the {@link PipelineRunner} and + *

{@link AutoService} will register Jet's implementations of the {@link PipelineRunner} and * {@link PipelineOptions} as available pipeline runner services. */ public final class JetTestRunnerRegistrar { diff --git a/sdks/java/javadoc/build.gradle b/sdks/java/javadoc/build.gradle index d3d4a97e2b8a..fbdc03dfe73d 100644 --- a/sdks/java/javadoc/build.gradle +++ b/sdks/java/javadoc/build.gradle @@ -65,7 +65,6 @@ task aggregateJavadoc(type: Javadoc) { exclude "org/apache/beam/examples/*" exclude "org/apache/beam/fn/harness/*" - exclude "org/apache/beam/runners/apex/translation/*" exclude "org/apache/beam/runners/core/*" exclude "org/apache/beam/runners/dataflow/internal/*" exclude "org/apache/beam/runners/flink/examples/*" diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml index de2e6c0660c1..a9d331d63ef0 100644 --- a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml +++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml @@ -185,52 +185,6 @@ - - apex-runner - - - - org.apache.beam - beam-runners-apex - ${beam.version} - runtime - - - - org.apache.httpcomponents - httpclient - 4.3.6 - runtime - - - commons-codec - commons-codec - - - - - - org.apache.hadoop - hadoop-yarn-client - ${hadoop.version} - runtime - - - org.apache.hadoop - hadoop-common - ${hadoop.version} - runtime - - - - dataflow-runner diff --git a/sdks/java/testing/nexmark/src/main/resources/log4j.properties b/sdks/java/testing/nexmark/src/main/resources/log4j.properties index 14f8acde98cb..d44bc387eae1 100644 --- a/sdks/java/testing/nexmark/src/main/resources/log4j.properties +++ b/sdks/java/testing/nexmark/src/main/resources/log4j.properties @@ -42,14 +42,6 @@ log4j.logger.io.netty=INFO # Settings to quiet flink logs log4j.logger.org.apache.flink=WARN -# Settings to quiet apex logs -log4j.logger.org.apache.beam.runners.apex=INFO -log4j.logger.com.datatorrent=ERROR -log4j.logger.org.apache.hadoop.metrics2=WARN -log4j.logger.org.apache.commons=WARN -log4j.logger.org.apache.hadoop.security=WARN -log4j.logger.org.apache.hadoop.util=WARN - # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR diff --git a/settings.gradle b/settings.gradle index eb7a440798fc..a726dd9a5099 100644 --- a/settings.gradle +++ b/settings.gradle @@ -25,7 +25,6 @@ include ":examples:kotlin" include ":model:fn-execution" include ":model:job-management" include ":model:pipeline" -include ":runners:apex" include ":runners:core-construction-java" include ":runners:core-java" include ":runners:direct-java" diff --git a/website/www/site/content/en/contribute/_index.md b/website/www/site/content/en/contribute/_index.md index 528eb52b8641..417565bba1c3 100644 --- a/website/www/site/content/en/contribute/_index.md +++ b/website/www/site/content/en/contribute/_index.md @@ -38,7 +38,7 @@ There are lots of opportunities to contribute: - review [changes](https://github.com/apache/beam/pulls) - write new examples - improve your favorite language SDK (Java, Python, Go, etc) - - improve specific runners (Apache Apex, Apache Flink, Apache Spark, Google + - improve specific runners (Apache Flink, Apache Spark, Google Cloud Dataflow, etc) - improve or add IO connectors - add new transform libraries (statistics, ML, image processing, etc) diff --git a/website/www/site/content/en/contribute/ptransform-style-guide.md b/website/www/site/content/en/contribute/ptransform-style-guide.md index e8b2d7582a72..606ebae488e5 100644 --- a/website/www/site/content/en/contribute/ptransform-style-guide.md +++ b/website/www/site/content/en/contribute/ptransform-style-guide.md @@ -128,7 +128,7 @@ For a user, a transform that logged an error but succeeded is silent data loss. ### Performance -Many runners optimize chains of `ParDo`s in ways that improve performance if the `ParDo`s emit a small to moderate number of elements per input element, or have relatively cheap per-element processing (e.g. Dataflow's "fusion", Apex "compute locality"), but limit parallelization if these assumptions are violated. In that case you may need a "fusion break" (`Reshuffle.of()`) to improve the parallelizability of processing the output `PCollection` of the `ParDo`. +Many runners optimize chains of `ParDo`s in ways that improve performance if the `ParDo`s emit a small to moderate number of elements per input element, or have relatively cheap per-element processing (e.g. Dataflow's "fusion"), but limit parallelization if these assumptions are violated. In that case you may need a "fusion break" (`Reshuffle.of()`) to improve the parallelizability of processing the output `PCollection` of the `ParDo`. * If the transform includes a `ParDo` that outputs a potentially large number of elements per input element, apply a fusion break after this `ParDo` to make sure downstream transforms can process its output in parallel. * If the transform includes a `ParDo` that takes a very long time to process an element, insert a fusion break before this `ParDo` to make sure all or most elements can be processed in parallel regardless of how its input `PCollection` was produced. diff --git a/website/www/site/content/en/contribute/release-guide.md b/website/www/site/content/en/contribute/release-guide.md index 7e53d93614f6..6e2f5512c86b 100644 --- a/website/www/site/content/en/contribute/release-guide.md +++ b/website/www/site/content/en/contribute/release-guide.md @@ -834,7 +834,7 @@ Since there are a bunch of tests, we recommend you running validations using aut ``` * Tasks included - 1. Run Java quickstart with Direct Runner, Apex local runner, Flink local runner, Spark local runner and Dataflow runner. + 1. Run Java quickstart with Direct Runner, Flink local runner, Spark local runner and Dataflow runner. 1. Run Java Mobile Games(UserScore, HourlyTeamScore, Leaderboard) with Dataflow runner. 1. Create a PR to trigger python validation job, including * Python quickstart in batch and streaming mode with direct runner and Dataflow runner. @@ -864,12 +864,6 @@ _Note_: -Prepourl and -Pver can be found in the RC vote email sent by Release Ma -Prepourl=https://repository.apache.org/content/repositories/orgapachebeam-${KEY} \ -Pver=${RELEASE_VERSION} ``` - Apex Local Runner - ``` - ./gradlew :runners:apex:runQuickstartJavaApex \ - -Prepourl=https://repository.apache.org/content/repositories/orgapachebeam-${KEY} \ - -Pver=${RELEASE_VERSION} - ``` Flink Local Runner ``` ./gradlew :runners:flink:1.10:runQuickstartJavaFlinkLocal \ diff --git a/website/www/site/content/en/documentation/_index.md b/website/www/site/content/en/documentation/_index.md index 7e63bcfad176..cc28c9cba7fd 100644 --- a/website/www/site/content/en/documentation/_index.md +++ b/website/www/site/content/en/documentation/_index.md @@ -51,7 +51,6 @@ A Beam Runner runs a Beam pipeline on a specific (often distributed) data proces ### Available Runners * [DirectRunner](/documentation/runners/direct/): Runs locally on your machine -- great for developing, testing, and debugging. -* [ApexRunner](/documentation/runners/apex/): Runs on [Apache Apex](https://apex.apache.org). * [FlinkRunner](/documentation/runners/flink/): Runs on [Apache Flink](https://flink.apache.org). * [SparkRunner](/documentation/runners/spark/): Runs on [Apache Spark](https://spark.apache.org). * [DataflowRunner](/documentation/runners/dataflow/): Runs on [Google Cloud Dataflow](https://cloud.google.com/dataflow), a fully managed service within [Google Cloud Platform](https://cloud.google.com/). diff --git a/website/www/site/content/en/documentation/runners/apex.md b/website/www/site/content/en/documentation/runners/apex.md deleted file mode 100644 index 14f532a05067..000000000000 --- a/website/www/site/content/en/documentation/runners/apex.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -type: runners -title: "Apache Apex Runner" ---- - -# Using the Apache Apex Runner - -The Apex Runner executes Apache Beam pipelines using [Apache Apex](https://apex.apache.org/) as an underlying engine. The runner has broad support for the [Beam model and supports streaming and batch pipelines](/documentation/runners/capability-matrix/). - -[Apache Apex](https://apex.apache.org/) is a stream processing platform and framework for low-latency, high-throughput and fault-tolerant analytics applications on Apache Hadoop. Apex has a unified streaming architecture and can be used for real-time and batch processing. - -The following instructions are for running Beam pipelines with Apex on a YARN cluster. -They are not required for Apex in embedded mode (see [quickstart](/get-started/quickstart-java/)). - -## Apex Runner prerequisites - -You may set up your own Hadoop cluster. Beam does not require anything extra to launch the pipelines on YARN. -An optional Apex installation may be useful for monitoring and troubleshooting. -The Apex CLI can be [built](https://apex.apache.org/docs/apex/apex_development_setup/) or -obtained as binary build. -For more download options see [distribution information on the Apache Apex website](https://apex.apache.org/downloads.html). - -## Running wordcount with Apex - -Typically the build environment is separate from the target YARN cluster. In such case, it is necessary to build a fat jar that will include all dependencies. Ensure that `hadoop.version` in `pom.xml` matches the version of your YARN cluster and then build the jar file: -``` -mvn package -Papex-runner -``` - -Copy the resulting `target/word-count-beam-bundled-0.1.jar` to the cluster and submit the application using: -``` -java -cp word-count-beam-bundled-0.1.jar org.apache.beam.examples.WordCount --inputFile=/etc/profile --output=/tmp/counts --embeddedExecution=false --configFile=beam-runners-apex.properties --runner=ApexRunner -``` - -If the build environment is setup as cluster client, it is possible to run the example directly: -``` -mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.WordCount -Dexec.args="--inputFile=/etc/profile --output=/tmp/counts --runner=ApexRunner --embeddedExecution=false --configFile=beam-runners-apex.properties" -Papex-runner -``` - -The application will run asynchronously. Check status with `yarn application -list -appStates ALL` - -The configuration file is optional, it can be used to influence how Apex operators are deployed into YARN containers. -The following example will reduce the number of required containers by collocating the operators into the same container -and lower the heap memory per operator - suitable for execution in a single node Hadoop sandbox. - -``` -apex.application.*.operator.*.attr.MEMORY_MB=64 -apex.stream.*.prop.locality=CONTAINER_LOCAL -apex.application.*.operator.*.attr.TIMEOUT_WINDOW_COUNT=1200 -``` - -This example uses local files. To use a distributed file system (HDFS, S3 etc.), -it is necessary to augment the build to include the respective file system provider. - -## Montoring progress of your job - -Depending on your installation, you may be able to monitor the progress of your job on the Hadoop cluster. Alternatively, you have following options: - -* YARN : Using YARN web UI generally running on 8088 on the node running resource manager. -* Apex command-line interface: [Using the Apex CLI to get running application information](https://apex.apache.org/docs/apex/apex_cli/#apex-cli-commands). - -Check the output of the pipeline: -``` -ls /tmp/counts* -``` diff --git a/website/www/site/content/en/documentation/sdks/java/testing/nexmark.md b/website/www/site/content/en/documentation/sdks/java/testing/nexmark.md index 68595e2078a3..d73ceb4dc966 100644 --- a/website/www/site/content/en/documentation/sdks/java/testing/nexmark.md +++ b/website/www/site/content/en/documentation/sdks/java/testing/nexmark.md @@ -193,11 +193,6 @@ Available suites are: * STRESS: Like smoke but for 1m events. * FULL_THROTTLE: Like SMOKE but 100m events. - -### Apex runner specific configuration - - --manageResources=false --monitorJobs=false - ### Google Cloud Dataflow runner specific configuration --manageResources=false --monitorJobs=true \ @@ -261,105 +256,90 @@ These tables contain statuses of the queries runs in the different runners. Goog Direct Spark Flink - Apex 0 ok ok ok - ok 1 ok ok ok - ok 2 ok ok ok - ok 3 ok ok ok - BEAM-1114 4 ok ok ok - ok 5 ok ok ok - ok 6 ok ok ok - ok 7 ok ok ok - ok 8 ok ok ok - ok 9 ok ok ok - ok 10 ok ok ok - ok 11 ok ok ok - ok 12 ok ok ok - ok BOUNDED_SIDE_INPUT_JOIN ok ok ok - ok @@ -371,105 +351,90 @@ These tables contain statuses of the queries runs in the different runners. Goog Direct Spark BEAM-2847 Flink - Apex 0 ok ok ok - ok 1 ok ok ok - ok 2 ok ok ok - ok 3 ok BEAM-2176, BEAM-3961 ok - BEAM-1114 4 ok ok ok - ok 5 ok ok ok - ok 6 ok ok ok - ok 7 ok BEAM-2112 ok - ok 8 ok ok ok - ok 9 ok ok ok - ok 10 ok ok ok - ok 11 ok ok ok - ok 12 ok ok ok - ok BOUNDED_SIDE_INPUT_JOIN ok BEAM-2112 ok - ok @@ -578,32 +543,6 @@ Streaming Mode: --monitorJobs=true --flinkMaster=[local]" -### Running SMOKE suite on the ApexRunner (local) - -Batch Mode: - - ./gradlew :sdks:java:testing:nexmark:run \ - -Pnexmark.runner=":runners:apex" \ - -Pnexmark.args=" - --runner=ApexRunner - --suite=SMOKE - --streamTimeout=60 - --streaming=false - --manageResources=false - --monitorJobs=true" - -Streaming Mode: - - ./gradlew :sdks:java:testing:nexmark:run \ - -Pnexmark.runner=":runners:apex" \ - -Pnexmark.args=" - --runner=ApexRunner - --suite=SMOKE - --streamTimeout=60 - --streaming=true - --manageResources=false - --monitorJobs=true" - ### Running SMOKE suite on Google Cloud Dataflow Set these up first so the below command is valid diff --git a/website/www/site/content/en/documentation/sdks/python-streaming.md b/website/www/site/content/en/documentation/sdks/python-streaming.md index 7fed10d8d0b6..9c4a68800ff0 100644 --- a/website/www/site/content/en/documentation/sdks/python-streaming.md +++ b/website/www/site/content/en/documentation/sdks/python-streaming.md @@ -126,10 +126,6 @@ python -m apache_beam.examples.streaming_wordcount \ --streaming {{< /highlight >}} -{{< highlight class="runner-apex" >}} -This runner is not yet available for the Python SDK. -{{< /highlight >}} - {{< highlight class="runner-flink-local" >}} See https://beam.apache.org/roadmap/portability/#python-on-flink for more information. {{< /highlight >}} diff --git a/website/www/site/content/en/get-started/beam-overview.md b/website/www/site/content/en/get-started/beam-overview.md index d72b91d35692..ba24f316b947 100644 --- a/website/www/site/content/en/get-started/beam-overview.md +++ b/website/www/site/content/en/get-started/beam-overview.md @@ -21,7 +21,7 @@ limitations under the License. # Apache Beam Overview -Apache Beam is an open source, unified model for defining both batch and streaming data-parallel processing pipelines. Using one of the open source Beam SDKs, you build a program that defines the pipeline. The pipeline is then executed by one of Beam's supported **distributed processing back-ends**, which include [Apache Apex](https://apex.apache.org), [Apache Flink](https://flink.apache.org), [Apache Spark](http://spark.apache.org), and [Google Cloud Dataflow](https://cloud.google.com/dataflow). +Apache Beam is an open source, unified model for defining both batch and streaming data-parallel processing pipelines. Using one of the open source Beam SDKs, you build a program that defines the pipeline. The pipeline is then executed by one of Beam's supported **distributed processing back-ends**, which include [Apache Flink](https://flink.apache.org), [Apache Spark](http://spark.apache.org), and [Google Cloud Dataflow](https://cloud.google.com/dataflow). Beam is particularly useful for [Embarrassingly Parallel](https://en.wikipedia.org/wiki/Embarassingly_parallel) data processing tasks, in which the problem can be decomposed into many smaller bundles of data that can be processed independently and in parallel. You can also use Beam for Extract, Transform, and Load (ETL) tasks and pure data integration. These tasks are useful for moving data between different storage media and data sources, transforming data into a more desirable format, or loading data onto a new system. @@ -43,7 +43,6 @@ The Beam Pipeline Runners translate the data processing pipeline you define with Beam currently supports Runners that work with the following distributed processing back-ends: -- Apache Apex ![Apache Apex logo](/images/logos/runners/apex.png) - Apache Flink ![Apache Flink logo](/images/logos/runners/flink.png) - Apache Samza Apache Samza logo - Apache Spark ![Apache Spark logo](/images/logos/runners/spark.png) diff --git a/website/www/site/content/en/get-started/quickstart-java.md b/website/www/site/content/en/get-started/quickstart-java.md index 0844fef978c8..42e4431fc391 100644 --- a/website/www/site/content/en/get-started/quickstart-java.md +++ b/website/www/site/content/en/get-started/quickstart-java.md @@ -142,7 +142,7 @@ $ gradle build ## Run WordCount -A single Beam pipeline can run on multiple Beam [runners](/documentation#runners), including the [ApexRunner](/documentation/runners/apex), [FlinkRunner](/documentation/runners/flink), [SparkRunner](/documentation/runners/spark), [NemoRunner](/documentation/runners/nemo), [JetRunner](/documentation/runners/jet), or [DataflowRunner](/documentation/runners/dataflow). The [DirectRunner](/documentation/runners/direct) is a common runner for getting started, as it runs locally on your machine and requires no specific setup. +A single Beam pipeline can run on multiple Beam [runners](/documentation#runners), including the [FlinkRunner](/documentation/runners/flink), [SparkRunner](/documentation/runners/spark), [NemoRunner](/documentation/runners/nemo), [JetRunner](/documentation/runners/jet), or [DataflowRunner](/documentation/runners/dataflow). The [DirectRunner](/documentation/runners/direct) is a common runner for getting started, as it runs locally on your machine and requires no specific setup. After you've chosen which runner you'd like to use: @@ -162,12 +162,6 @@ $ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.WordCount \ -Dexec.args="--inputFile=pom.xml --output=counts" -Pdirect-runner {{< /highlight >}} -{{< highlight class="runner-apex" >}} -$ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.WordCount \ - -Dexec.args="--inputFile=pom.xml --output=counts --runner=ApexRunner" -Papex-runner -{{< /highlight >}} - - {{< highlight class="runner-flink-local" >}} $ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.WordCount \ -Dexec.args="--runner=FlinkRunner --inputFile=pom.xml --output=counts" -Pflink-runner @@ -220,11 +214,6 @@ PS> mvn compile exec:java -D exec.mainClass=org.apache.beam.examples.WordCount ` -D exec.args="--inputFile=pom.xml --output=counts" -P direct-runner {{< /highlight >}} -{{< highlight class="runner-apex" >}} -PS> mvn compile exec:java -D exec.mainClass=org.apache.beam.examples.WordCount ` - -D exec.args="--inputFile=pom.xml --output=counts --runner=ApexRunner" -P apex-runner -{{< /highlight >}} - {{< highlight class="runner-flink-local" >}} PS> mvn compile exec:java -D exec.mainClass=org.apache.beam.examples.WordCount ` -D exec.args="--runner=FlinkRunner --inputFile=pom.xml --output=counts" -P flink-runner @@ -323,10 +312,6 @@ Once the pipeline has completed, you can view the output. You'll notice that the $ ls counts* {{< /highlight >}} -{{< highlight class="runner-apex" >}} -$ ls counts* -{{< /highlight >}} - {{< highlight class="runner-flink-local" >}} $ ls counts* {{< /highlight >}} @@ -369,16 +354,6 @@ Foundation: 1 ... {{< /highlight >}} -{{< highlight class="runner-apex" >}} -$ cat counts* -BEAM: 1 -have: 1 -simple: 1 -skip: 4 -PAssert: 1 -... -{{< /highlight >}} - {{< highlight class="runner-flink-local" >}} $ more counts* The: 1 diff --git a/website/www/site/content/en/get-started/quickstart-py.md b/website/www/site/content/en/get-started/quickstart-py.md index a74b811d8658..1c0afda35911 100644 --- a/website/www/site/content/en/get-started/quickstart-py.md +++ b/website/www/site/content/en/get-started/quickstart-py.md @@ -157,10 +157,6 @@ For example, run `wordcount.py` with the following command: python -m apache_beam.examples.wordcount --input /path/to/inputfile --output /path/to/write/counts {{< /highlight >}} -{{< highlight class="runner-apex" >}} -This runner is not yet available for the Python SDK. -{{< /highlight >}} - {{< highlight class="runner-flink-local" >}} Currently, running wordcount.py on Flink requires a full download of the Beam source code. See https://beam.apache.org/roadmap/portability/#python-on-flink for more information. diff --git a/website/www/site/content/en/get-started/wordcount-example.md b/website/www/site/content/en/get-started/wordcount-example.md index 08e447912fc6..cd08f5a4cef9 100644 --- a/website/www/site/content/en/get-started/wordcount-example.md +++ b/website/www/site/content/en/get-started/wordcount-example.md @@ -336,11 +336,6 @@ $ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.WordCount \ -Dexec.args="--inputFile=pom.xml --output=counts" -Pdirect-runner {{< /highlight >}} -{{< highlight class="runner-apex" >}} -$ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.WordCount \ - -Dexec.args="--inputFile=pom.xml --output=counts --runner=ApexRunner" -Papex-runner -{{< /highlight >}} - {{< highlight class="runner-flink-local" >}} $ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.WordCount \ -Dexec.args="--runner=FlinkRunner --inputFile=pom.xml --output=counts" -Pflink-runner @@ -391,10 +386,6 @@ To view the full code in Java, see python -m apache_beam.examples.wordcount --input YOUR_INPUT_FILE --output counts {{< /highlight >}} -{{< highlight class="runner-apex" >}} -This runner is not yet available for the Python SDK. -{{< /highlight >}} - {{< highlight class="runner-flink-local" >}} Currently, running wordcount.py on Flink requires a full download of the Beam source code. See https://beam.apache.org/roadmap/portability/#python-on-flink for more information. @@ -443,10 +434,6 @@ $ go install github.com/apache/beam/sdks/go/examples/wordcount $ wordcount --input --output counts {{< /highlight >}} -{{< highlight class="runner-apex" >}} -This runner is not yet available for the Go SDK. -{{< /highlight >}} - {{< highlight class="runner-flink-local" >}} This runner is not yet available for the Go SDK. {{< /highlight >}} @@ -679,11 +666,6 @@ $ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.DebuggingWordC -Dexec.args="--output=counts" -Pdirect-runner {{< /highlight >}} -{{< highlight class="runner-apex" >}} -$ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.DebuggingWordCount \ - -Dexec.args="--output=counts --runner=ApexRunner" -Papex-runner -{{< /highlight >}} - {{< highlight class="runner-flink-local" >}} $ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.DebuggingWordCount \ -Dexec.args="--runner=FlinkRunner --output=counts" -Pflink-runner @@ -734,10 +716,6 @@ To view the full code in Java, see python -m apache_beam.examples.wordcount_debugging --input YOUR_INPUT_FILE --output counts {{< /highlight >}} -{{< highlight class="runner-apex" >}} -This runner is not yet available for the Python SDK. -{{< /highlight >}} - {{< highlight class="runner-flink-local" >}} This runner is not yet available for the Python SDK. {{< /highlight >}} @@ -782,10 +760,6 @@ $ go install github.com/apache/beam/sdks/go/examples/debugging_wordcount $ debugging_wordcount --input --output counts {{< /highlight >}} -{{< highlight class="runner-apex" >}} -This runner is not yet available for the Go SDK. -{{< /highlight >}} - {{< highlight class="runner-flink-local" >}} This runner is not yet available for the Go SDK. {{< /highlight >}} @@ -928,11 +902,6 @@ or DEBUG significantly increases the amount of logs output. > **Note:** This section is yet to be added. There is an open issue for this > ([BEAM-791](https://issues.apache.org/jira/browse/BEAM-791)). -#### Apache Apex Runner - -> **Note:** This section is yet to be added. There is an open issue for this -> ([BEAM-2285](https://issues.apache.org/jira/browse/BEAM-2285)). - #### Apache Nemo Runner When executing your pipeline with the `NemoRunner`, most log messages are printed @@ -1021,11 +990,6 @@ $ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.WindowedWordCo -Dexec.args="--inputFile=pom.xml --output=counts" -Pdirect-runner {{< /highlight >}} -{{< highlight class="runner-apex" >}} -$ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.WindowedWordCount \ - -Dexec.args="--inputFile=pom.xml --output=counts --runner=ApexRunner" -Papex-runner -{{< /highlight >}} - {{< highlight class="runner-flink-local" >}} $ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.WindowedWordCount \ -Dexec.args="--runner=FlinkRunner --inputFile=pom.xml --output=counts" -Pflink-runner @@ -1080,10 +1044,6 @@ parameter. using the format `PROJECT:DATASET.TABLE` or python -m apache_beam.examples.windowed_wordcount --input YOUR_INPUT_FILE --output_table PROJECT:DATASET.TABLE {{< /highlight >}} -{{< highlight class="runner-apex" >}} -This runner is not yet available for the Python SDK. -{{< /highlight >}} - {{< highlight class="runner-flink-local" >}} This runner is not yet available for the Python SDK. {{< /highlight >}} @@ -1128,10 +1088,6 @@ $ go install github.com/apache/beam/sdks/go/examples/windowed_wordcount $ windowed_wordcount --input --output counts {{< /highlight >}} -{{< highlight class="runner-apex" >}} -This runner is not yet available for the Go SDK. -{{< /highlight >}} - {{< highlight class="runner-flink-local" >}} This runner is not yet available for the Go SDK. {{< /highlight >}} @@ -1388,10 +1344,6 @@ python -m apache_beam.examples.streaming_wordcount \ --streaming {{< /highlight >}} -{{< highlight class="runner-apex" >}} -This runner is not yet available for the Python SDK. -{{< /highlight >}} - {{< highlight class="runner-flink-local" >}} This runner is not yet available for the Python SDK. {{< /highlight >}} diff --git a/website/www/site/content/en/roadmap/apex-runner.md b/website/www/site/content/en/roadmap/apex-runner.md deleted file mode 100644 index d6e3fe9eef3f..000000000000 --- a/website/www/site/content/en/roadmap/apex-runner.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: "Samza Runner Roadmap" ---- - - -# Apache Apex Runner Roadmap - -This roadmap is in progress. In the meantime, here are available resources: - - - [Runner documentation](/documentation/runners/apex) - - JIRA: [runner-apex](https://issues.apache.org/jira/issues/?jql=project%20%3D%20BEAM%20AND%20component%20%3D%20runner-apex) diff --git a/website/www/site/data/capability_matrix.yaml b/website/www/site/data/capability_matrix.yaml index ad34e568cc32..c041328023b6 100644 --- a/website/www/site/data/capability_matrix.yaml +++ b/website/www/site/data/capability_matrix.yaml @@ -22,8 +22,6 @@ capability-matrix: name: Apache Spark (RDD/DStream based) - class: spark-dataset name: Apache Spark Structured Streaming (Dataset based) - - class: apex - name: Apache Apex - class: mapreduce name: Apache Hadoop MapReduce - class: jstorm @@ -67,10 +65,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: ParDo applies per-element transformations as Spark FlatMapFunction. - - class: apex - l1: "Yes" - l2: fully supported - l3: Supported through Apex operator that wraps the function and processes data as single element bundles. - class: mapreduce l1: "Yes" l2: fully supported @@ -117,10 +111,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: "Using Spark's groupByKey." - - class: apex - l1: "Yes" - l2: fully supported - l3: "Apex runner uses the Beam code for grouping by window and thereby has support for all windowing and triggering mechanisms. Runner does not implement partitioning yet (BEAM-838)" - class: mapreduce l1: "Yes" l2: fully supported @@ -167,10 +157,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: Some corner cases like flatten on empty collections are not yet supported. - - class: apex - l1: "Yes" - l2: fully supported - l3: "" - class: mapreduce l1: "Yes" l2: fully supported @@ -217,10 +203,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: "Using Spark's Aggregator and agg function" - - class: apex - l1: "Yes" - l2: "fully supported" - l3: "Default Beam translation. Currently no efficient pre-aggregation (BEAM-935)." - class: mapreduce l1: "Yes" l2: fully supported @@ -267,10 +249,6 @@ capability-matrix: l1: "Partially" l2: supported via inlining only in batch mode l3: "" - - class: apex - l1: "Partially" - l2: supported via inlining - l3: "" - class: mapreduce l1: "Yes" l2: fully supported @@ -317,10 +295,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: "Using Spark's broadcast variables." - - class: apex - l1: "Yes" - l2: size restrictions - l3: No distributed implementation and therefore size restrictions. - class: mapreduce l1: "Yes" l2: fully supported @@ -367,10 +341,6 @@ capability-matrix: l1: "Partially" l2: bounded source only l3: "Using Spark's DatasourceV2 API in microbatch mode (Continuous streaming mode is tagged experimental in spark and does not support aggregation)." - - class: apex - l1: "Yes" - l2: fully supported - l3: - class: mapreduce l1: "Partially" l2: bounded source only @@ -417,10 +387,6 @@ capability-matrix: l1: "No" l2: not implemented l3: - - class: apex - l1: "Partially" - l2: supports bounded-per-element SDFs - l3: implementation in streaming mode coming soon - class: mapreduce l1: "No" l2: not implemented @@ -467,10 +433,6 @@ capability-matrix: l1: "Partially" l2: All metric types are supported in batch mode. l3: Only attempted values are supported. No committed values for metrics. - - class: apex - l1: "No" - l2: Not implemented in runner. - l3: - class: mapreduce l1: "Partially" l2: Only attempted counters are supported @@ -517,10 +479,6 @@ capability-matrix: l1: "No" l2: not implemented l3: - - class: apex - l1: "Partially" - l2: non-merging windows - l3: State is supported for non-merging windows. SetState and MapState are not yet supported. - class: mapreduce l1: "Partially" l2: non-merging windows @@ -574,10 +532,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: "" - - class: apex - l1: "Yes" - l2: supported - l3: "" - class: mapreduce l1: "Yes" l2: supported @@ -624,10 +578,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: "" - - class: apex - l1: "Yes" - l2: supported - l3: "" - class: mapreduce l1: "Yes" l2: supported @@ -674,10 +624,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: "" - - class: apex - l1: "Yes" - l2: supported - l3: "" - class: mapreduce l1: "Yes" l2: supported @@ -724,10 +670,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: "" - - class: apex - l1: "Yes" - l2: supported - l3: "" - class: mapreduce l1: "Yes" l2: supported @@ -774,10 +716,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: "" - - class: apex - l1: "Yes" - l2: supported - l3: "" - class: mapreduce l1: "Yes" l2: supported @@ -824,10 +762,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: "" - - class: apex - l1: "Yes" - l2: supported - l3: "" - class: mapreduce l1: "Yes" l2: supported @@ -874,10 +808,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: "" - - class: apex - l1: "Yes" - l2: supported - l3: "" - class: mapreduce l1: "Yes" l2: supported @@ -932,10 +862,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: "" - - class: apex - l1: "Yes" - l2: fully supported - l3: "" - class: mapreduce l1: "No" l2: batch-only runner @@ -983,10 +909,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: "" - - class: apex - l1: "Yes" - l2: fully supported - l3: "" - class: mapreduce l1: "No" l2: "" @@ -1034,10 +956,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: - - class: apex - l1: "Yes" - l2: fully supported - l3: "" - class: mapreduce l1: "No" l2: "" @@ -1085,10 +1003,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: "" - - class: apex - l1: "Yes" - l2: fully supported - l3: "" - class: mapreduce l1: "No" l2: "" @@ -1137,10 +1051,6 @@ capability-matrix: l1: "No" l2: pending model support l3: - - class: apex - l1: "No" - l2: pending model support - l3: - class: mapreduce l1: "No" l2: "" @@ -1188,10 +1098,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: "" - - class: apex - l1: "Yes" - l2: fully supported - l3: "" - class: mapreduce l1: "No" l2: "" @@ -1239,10 +1145,6 @@ capability-matrix: l1: "No" l2: no streaming support in the runner l3: "" - - class: apex - l1: "Yes" - l2: fully supported - l3: "" - class: mapreduce l1: "No" l2: "" @@ -1290,10 +1192,6 @@ capability-matrix: l1: "No" l2: not implemented l3: "" - - class: apex - l1: "No" - l2: not implemented - l3: "" - class: mapreduce l1: "No" l2: "" @@ -1348,10 +1246,6 @@ capability-matrix: l1: "Partially" l2: fully supported in batch mode l3: "" - - class: apex - l1: "Yes" - l2: fully supported - l3: "" - class: mapreduce l1: "No" l2: batch-only runner @@ -1399,10 +1293,6 @@ capability-matrix: l1: "No" l2: "" l3: "" - - class: apex - l1: "Yes" - l2: fully supported - l3: "Size restriction, see combine support." - class: mapreduce l1: "No" l2: "" @@ -1451,10 +1341,6 @@ capability-matrix: l1: "No" l2: pending model support l3: "" - - class: apex - l1: "No" - l2: pending model support - l3: "" - class: mapreduce l1: "No" l2: "" @@ -1508,10 +1394,6 @@ capability-matrix: l1: l2: l3: - - class: apex - l1: - l2: - l3: - class: mapreduce l1: l2: @@ -1554,10 +1436,6 @@ capability-matrix: l1: "No" l2: l3: not implemented - - class: apex - l1: - l2: - l3: - class: mapreduce l1: l2: diff --git a/website/www/site/data/works_with.yaml b/website/www/site/data/works_with.yaml index adf5f8fbc30a..ce2b5352a04c 100644 --- a/website/www/site/data/works_with.yaml +++ b/website/www/site/data/works_with.yaml @@ -9,10 +9,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -- title: APEX - image_url: /images/logo_apex.png - url: https://apex.apache.org + - title: Flink image_url: /images/logo_flink.png url: https://flink.apache.org diff --git a/website/www/site/layouts/partials/section-menu/en/roadmap.html b/website/www/site/layouts/partials/section-menu/en/roadmap.html index 84f5804570f3..0a37f28760b3 100644 --- a/website/www/site/layouts/partials/section-menu/en/roadmap.html +++ b/website/www/site/layouts/partials/section-menu/en/roadmap.html @@ -28,7 +28,6 @@ Runners