[improve] Remove the spark version of spark task (apache#11860)

xxjingcd · Sep 21, 2022 · 08a4c79 · 08a4c79
1 parent cf522e2
commit 08a4c79
Show file tree

Hide file tree

Showing 42 changed files with 151 additions and 478 deletions.
diff --git a/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh b/.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh
@@ -37,11 +37,10 @@ export REGISTRY_ZOOKEEPER_CONNECT_STRING=${REGISTRY_ZOOKEEPER_CONNECT_STRING:-zo
 # Tasks related configurations, need to change the configuration if you use the related tasks.
 export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
 export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
-export SPARK_HOME1=${SPARK_HOME1:-/opt/soft/spark1}
-export SPARK_HOME2=${SPARK_HOME2:-/opt/soft/spark2}
+export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark}
 export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python}
 export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
 export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
 export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax}
 
-export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH
+export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH
diff --git a/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh b/.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh
@@ -37,11 +37,10 @@ export REGISTRY_ZOOKEEPER_CONNECT_STRING=${REGISTRY_ZOOKEEPER_CONNECT_STRING:-zo
 # Tasks related configurations, need to change the configuration if you use the related tasks.
 export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
 export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
-export SPARK_HOME1=${SPARK_HOME1:-/opt/soft/spark1}
-export SPARK_HOME2=${SPARK_HOME2:-/opt/soft/spark2}
+export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark}
 export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python}
 export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
 export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
 export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax}
 
-export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH
+export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH
diff --git a/deploy/kubernetes/dolphinscheduler/values.yaml b/deploy/kubernetes/dolphinscheduler/values.yaml
@@ -160,8 +160,7 @@ common:
     # dolphinscheduler env
     HADOOP_HOME: "/opt/soft/hadoop"
     HADOOP_CONF_DIR: "/opt/soft/hadoop/etc/hadoop"
-    SPARK_HOME1: "/opt/soft/spark1"
-    SPARK_HOME2: "/opt/soft/spark2"
+    SPARK_HOME: "/opt/soft/spark"
     PYTHON_HOME: "/usr/bin/python"
     JAVA_HOME: "/usr/local/openjdk-8"
     HIVE_HOME: "/opt/soft/hive"

diff --git a/docs/docs/en/architecture/configuration.md b/docs/docs/en/architecture/configuration.md
@@ -347,14 +347,13 @@ export JAVA_HOME=${JAVA_HOME:-/opt/soft/java}
 # Tasks related configurations, need to change the configuration if you use the related tasks.
 export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
 export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
-export SPARK_HOME1=${SPARK_HOME1:-/opt/soft/spark1}
-export SPARK_HOME2=${SPARK_HOME2:-/opt/soft/spark2}
+export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark}
 export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python}
 export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
 export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
 export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax}
 
-export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH
+export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH
 ```
 
 ### Log related configuration

diff --git a/docs/docs/en/architecture/task-structure.md b/docs/docs/en/architecture/task-structure.md
@@ -256,21 +256,20 @@ No.|parameter name||type|description |note
 | 13  |                      | executorMemory | String | executor memory             |
 | 14  |                      | executorCores  | String | executor cores              |
 | 15  |                      | programType    | String | program type                | JAVA,SCALA,PYTHON            |
-| 16  |                      | sparkVersion   | String | Spark version               | SPARK1 , SPARK2              |
-| 17  |                      | localParams    | Array  | customized local parameters |
-| 18  |                      | resourceList   | Array  | resource files              |
-| 19  | description          |                | String | description                 |                              |
-| 20  | runFlag              |                | String | execution flag              |                              |
-| 21  | conditionResult      |                | Object | condition branch            |                              |
-| 22  |                      | successNode    | Array  | jump to node if success     |                              |
-| 23  |                      | failedNode     | Array  | jump to node if failure     |
-| 24  | dependence           |                | Object | task dependency             | mutual exclusion with params |
-| 25  | maxRetryTimes        |                | String | max retry times             |                              |
-| 26  | retryInterval        |                | String | retry interval              |                              |
-| 27  | timeout              |                | Object | timeout                     |                              |
-| 28  | taskInstancePriority |                | String | task priority               |                              |
-| 29  | workerGroup          |                | String | Worker group                |                              |
-| 30  | preTasks             |                | Array  | preposition tasks           |                              |
+| 16  |                      | localParams    | Array  | customized local parameters |
+| 17  |                      | resourceList   | Array  | resource files              |
+| 18  | description          |                | String | description                 |                              |
+| 19  | runFlag              |                | String | execution flag              |                              |
+| 20  | conditionResult      |                | Object | condition branch            |                              |
+| 21  |                      | successNode    | Array  | jump to node if success     |                              |
+| 22  |                      | failedNode     | Array  | jump to node if failure     |
+| 23  | dependence           |                | Object | task dependency             | mutual exclusion with params |
+| 24  | maxRetryTimes        |                | String | max retry times             |                              |
+| 25  | retryInterval        |                | String | retry interval              |                              |
+| 26  | timeout              |                | Object | timeout                     |                              |
+| 27  | taskInstancePriority |                | String | task priority               |                              |
+| 28  | workerGroup          |                | String | Worker group                |                              |
+| 29  | preTasks             |                | Array  | preposition tasks           |                              |
 
 **Node data example:**
 
@@ -302,8 +301,7 @@ No.|parameter name||type|description |note
         "executorCores":2,
         "mainArgs":"10",
         "others":"",
-        "programType":"SCALA",
-        "sparkVersion":"SPARK2"
+        "programType":"SCALA"
     },
     "description":"",
     "runFlag":"NORMAL",

diff --git a/docs/docs/en/faq.md b/docs/docs/en/faq.md
@@ -229,7 +229,7 @@ export PYTHON_HOME=/bin/python
 Note: This is **PYTHON_HOME** , which is the absolute path of the python command, not the simple PYTHON_HOME. Also note that when exporting the PATH, you need to directly
 
 ```
-export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH
+export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH
 ```
 
 		2，For versions prior to 1.0.3, the Python task only supports the Python version of the system. It does not support specifying the Python version.

diff --git a/docs/docs/en/guide/expansion-reduction.md b/docs/docs/en/guide/expansion-reduction.md
@@ -79,14 +79,13 @@ Attention:
   ```shell
       export HADOOP_HOME=/opt/soft/hadoop
       export HADOOP_CONF_DIR=/opt/soft/hadoop/etc/hadoop
-      # export SPARK_HOME1=/opt/soft/spark1
-      export SPARK_HOME2=/opt/soft/spark2
+      export SPARK_HOME=/opt/soft/spark
       export PYTHON_HOME=/opt/soft/python
       export JAVA_HOME=/opt/soft/jav
       export HIVE_HOME=/opt/soft/hive
       export FLINK_HOME=/opt/soft/flink
       export DATAX_HOME=/opt/soft/datax/bin/datax.py
-      export PATH=$HADOOP_HOME/bin:$SPARK_HOME2/bin:$PYTHON_HOME:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH:$FLINK_HOME/bin:$DATAX_HOME:$PATH
+      export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH:$FLINK_HOME/bin:$DATAX_HOME:$PATH
 
   ```
 

diff --git a/docs/docs/en/guide/installation/kubernetes.md b/docs/docs/en/guide/installation/kubernetes.md
@@ -360,7 +360,7 @@ kubectl cp -n test spark-2.4.7-bin-hadoop2.7.tgz dolphinscheduler-worker-0:/opt/
 
 Because the volume `sharedStoragePersistence` is mounted on `/opt/soft`, all files in `/opt/soft` will not be lost.
 
-5. Attach the container and ensure that `SPARK_HOME2` exists.
+5. Attach the container and ensure that `SPARK_HOME` exists.
 
 ```bash
 kubectl exec -it dolphinscheduler-worker-0 bash
@@ -369,15 +369,15 @@ cd /opt/soft
 tar zxf spark-2.4.7-bin-hadoop2.7.tgz
 rm -f spark-2.4.7-bin-hadoop2.7.tgz
 ln -s spark-2.4.7-bin-hadoop2.7 spark2 # or just mv
-$SPARK_HOME2/bin/spark-submit --version
+$SPARK_HOME/bin/spark-submit --version
 ```
 
 The last command will print the Spark version if everything goes well.
 
 6. Verify Spark under a Shell task.
 
 ```
-$SPARK_HOME2/bin/spark-submit --class org.apache.spark.examples.SparkPi $SPARK_HOME2/examples/jars/spark-examples_2.11-2.4.7.jar
+$SPARK_HOME/bin/spark-submit --class org.apache.spark.examples.SparkPi $SPARK_HOME/examples/jars/spark-examples_2.11-2.4.7.jar
 ```
 
 Check whether the task log contains the output like `Pi is roughly 3.146015`.
@@ -386,7 +386,6 @@ Check whether the task log contains the output like `Pi is roughly 3.146015`.
 
 The file `spark-examples_2.11-2.4.7.jar` needs to be uploaded to the resources first, and then create a Spark task with:
 
-- Spark Version: `SPARK2`
 - Main Class: `org.apache.spark.examples.SparkPi`
 - Main Package: `spark-examples_2.11-2.4.7.jar`
 - Deploy Mode: `local`
@@ -399,47 +398,6 @@ Spark on YARN (Deploy Mode is `cluster` or `client`) requires Hadoop support. Si
 
 Ensure that `$HADOOP_HOME` and `$HADOOP_CONF_DIR` exists.
 
-### How to Support Spark 3?
-
-In fact, the way to submit applications with `spark-submit` is the same, regardless of Spark 1, 2 or 3. In other words, the semantics of `SPARK_HOME2` is the second `SPARK_HOME` instead of `SPARK2`'s `HOME`, so just set `SPARK_HOME2=/path/to/spark3`.
-
-Take Spark 3.1.1 as an example:
-
-1. Download the Spark 3.1.1 release binary `spark-3.1.1-bin-hadoop2.7.tgz`.
-
-2. Ensure that `common.sharedStoragePersistence.enabled` is turned on.
-
-3. Run a DolphinScheduler release in Kubernetes (See **Install DolphinScheduler**).
-
-4. Copy the Spark 3.1.1 release binary into the Docker container.
-
-```bash
-kubectl cp spark-3.1.1-bin-hadoop2.7.tgz dolphinscheduler-worker-0:/opt/soft
-kubectl cp -n test spark-3.1.1-bin-hadoop2.7.tgz dolphinscheduler-worker-0:/opt/soft # with test namespace
-```
-
-5. Attach the container and ensure that `SPARK_HOME2` exists.
-
-```bash
-kubectl exec -it dolphinscheduler-worker-0 bash
-kubectl exec -n test -it dolphinscheduler-worker-0 bash # with test namespace
-cd /opt/soft
-tar zxf spark-3.1.1-bin-hadoop2.7.tgz
-rm -f spark-3.1.1-bin-hadoop2.7.tgz
-ln -s spark-3.1.1-bin-hadoop2.7 spark2 # or just mv
-$SPARK_HOME2/bin/spark-submit --version
-```
-
-The last command will print the Spark version if everything goes well.
-
-6. Verify Spark under a Shell task.
-
-```
-$SPARK_HOME2/bin/spark-submit --class org.apache.spark.examples.SparkPi $SPARK_HOME2/examples/jars/spark-examples_2.12-3.1.1.jar
-```
-
-Check whether the task log contains the output like `Pi is roughly 3.146015`.
-
 ### How to Support Shared Storage Between Master, Worker and Api Server?
 
 For example, Master, Worker and API server may use Hadoop at the same time.
@@ -579,8 +537,7 @@ common:
 | `common.configmap.SW_GRPC_LOG_SERVER_PORT`                           | Set grpc log server port for skywalking                                                                                       | `11800`                               |
 | `common.configmap.HADOOP_HOME`                                       | Set `HADOOP_HOME` for DolphinScheduler's task environment                                                                     | `/opt/soft/hadoop`                    |
 | `common.configmap.HADOOP_CONF_DIR`                                   | Set `HADOOP_CONF_DIR` for DolphinScheduler's task environment                                                                 | `/opt/soft/hadoop/etc/hadoop`         |
-| `common.configmap.SPARK_HOME1`                                       | Set `SPARK_HOME1` for DolphinScheduler's task environment                                                                     | `/opt/soft/spark1`                    |
-| `common.configmap.SPARK_HOME2`                                       | Set `SPARK_HOME2` for DolphinScheduler's task environment                                                                     | `/opt/soft/spark2`                    |
+| `common.configmap.SPARK_HOME`                                        | Set `SPARK_HOME` for DolphinScheduler's task environment                                                                      | `/opt/soft/spark`                     |
 | `common.configmap.PYTHON_HOME`                                       | Set `PYTHON_HOME` for DolphinScheduler's task environment                                                                     | `/usr/bin/python`                     |
 | `common.configmap.JAVA_HOME`                                         | Set `JAVA_HOME` for DolphinScheduler's task environment                                                                       | `/usr/local/openjdk-8`                |
 | `common.configmap.HIVE_HOME`                                         | Set `HIVE_HOME` for DolphinScheduler's task environment                                                                       | `/opt/soft/hive`                      |

diff --git a/docs/docs/en/guide/installation/pseudo-cluster.md b/docs/docs/en/guide/installation/pseudo-cluster.md
@@ -131,14 +131,13 @@ export REGISTRY_ZOOKEEPER_CONNECT_STRING=${REGISTRY_ZOOKEEPER_CONNECT_STRING:-lo
 # Tasks related configurations, need to change the configuration if you use the related tasks.
 export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
 export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
-export SPARK_HOME1=${SPARK_HOME1:-/opt/soft/spark1}
-export SPARK_HOME2=${SPARK_HOME2:-/opt/soft/spark2}
+export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark}
 export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python}
 export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
 export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
 export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax}
 
-export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH
+export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH
 ```
 
 ## Initialize the Database

diff --git a/docs/docs/en/guide/task/spark.md b/docs/docs/en/guide/task/spark.md
@@ -20,7 +20,6 @@ Spark task type for executing Spark application. When executing the Spark task,
 |       **Parameter**        |                                                                  **Description**                                                                  |
 |----------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------|
 | Program type               | Supports Java, Scala, Python, and SQL.                                                                                                            |
-| Spark version              | Support Spark1 and Spark2.                                                                                                                        |
 | The class of main function | The **full path** of Main Class, the entry point of the Spark program.                                                                            |
 | Main jar package           | The Spark jar package (upload by Resource Center).                                                                                                |
 | SQL scripts                | SQL statements in .sql files that Spark sql runs.                                                                                                 |

diff --git a/docs/docs/en/guide/upgrade/incompatible.md b/docs/docs/en/guide/upgrade/incompatible.md
@@ -4,6 +4,8 @@ This document records the incompatible updates between each version. You need to
 
 ## dev
 
+* Remove the spark version of spark task ([#11860](https://github.com/apache/dolphinscheduler/pull/11860)).
+
 ## 3.0.0
 
 * Copy and import workflow without 'copy' suffix [#10607](https://github.com/apache/dolphinscheduler/pull/10607)

diff --git a/docs/docs/zh/architecture/configuration.md b/docs/docs/zh/architecture/configuration.md
@@ -339,14 +339,13 @@ export JAVA_HOME=${JAVA_HOME:-/opt/soft/java}
 # Tasks related configurations, need to change the configuration if you use the related tasks.
 export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
 export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
-export SPARK_HOME1=${SPARK_HOME1:-/opt/soft/spark1}
-export SPARK_HOME2=${SPARK_HOME2:-/opt/soft/spark2}
+export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark}
 export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python}
 export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
 export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
 export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax}
 
-export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH
+export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH
 ```
 
 ## 日志相关配置
-Original file line number
+Diff line change
@@ Expand Up @@
     ## dev
+    * Remove the spark version of spark task ([#11860](https://github.com/apache/dolphinscheduler/pull/11860)).
     ## 3.0.0
     * Copy and import workflow without 'copy' suffix [#10607](https://github.com/apache/dolphinscheduler/pull/10607)
@@ Expand Down @@