From e3c7a5921db48400a8b19e6a8ce62cd24e473896 Mon Sep 17 00:00:00 2001
From: "acqua.csq" <acqua.csq@alibaba-inc.com>
Date: Tue, 15 Dec 2020 15:15:58 +0800
Subject: [PATCH] [FLINK-20601][docs][python] Rework PyFlink CLI documentation.

This closes #14367.
---
 docs/deployment/cli.md                    | 72 +++++++++++++++++++++++
 docs/deployment/cli.zh.md                 | 72 +++++++++++++++++++++++
 docs/dev/python/datastream_tutorial.md    |  2 +-
 docs/dev/python/datastream_tutorial.zh.md |  2 +-
 docs/dev/python/table_api_tutorial.md     |  2 +-
 docs/dev/python/table_api_tutorial.zh.md  |  2 +-
 6 files changed, 148 insertions(+), 4 deletions(-)
diff --git a/docs/deployment/cli.md b/docs/deployment/cli.md
index adcbdd1be50d8..6984f124a09f2 100644
--- a/docs/deployment/cli.md
+++ b/docs/deployment/cli.md
@@ -338,4 +338,76 @@ specified in the `config/flink-config.yaml`.
 For more details on the commands and the available options, please refer to the Resource Provider-specific 
 pages of the documentation.
 
+### Submitting PyFlink Jobs
+
+Currently, users are able to submit a PyFlink job via the CLI. It does not require to specify the
+JAR file path or the entry main class, which is different from the Java job submission.
+
+<span class="label label-info">Note</span> When submitting Python job via `flink run`, Flink will run the command "python". Please run the following command to confirm that the python executable in current environment points to a supported Python version of 3.5+.
+{% highlight bash %}
+$ python --version
+# the version printed here must be 3.5+
+{% endhighlight %}
+
+The following commands show different PyFlink job submission use-cases:
+
+- Run a PyFlink job:
+{% highlight bash %}
+$ ./bin/flink run --python examples/python/table/batch/word_count.py
+{% endhighlight %}
+
+- Run a PyFlink job with additional source and resource files. Files specified in `--pyFiles` will be
+added to the `PYTHONPATH` and, therefore, available in the Python code.
+{% highlight bash %}
+$ ./bin/flink run \
+      --python examples/python/table/batch/word_count.py \
+      --pyFiles file:///user.txt,hdfs:///$namenode_address/username.txt
+{% endhighlight %}
+
+- Run a PyFlink job which will reference Java UDF or external connectors. JAR file specified in `--jarfile` will be uploaded
+to the cluster.
+{% highlight bash %}
+$ ./bin/flink run \
+      --python examples/python/table/batch/word_count.py \
+      --jarfile <jarFile>
+{% endhighlight %}
+
+- Run a PyFlink job with pyFiles and the main entry module specified in `--pyModule`:
+{% highlight bash %}
+$ ./bin/flink run \
+      --pyModule batch.word_count \
+      --pyFiles examples/python/table/batch
+{% endhighlight %}
+
+- Submit a PyFlink job on a specific JobManager running on host `<jobmanagerHost>` (adapt the command accordingly):
+{% highlight bash %}
+$ ./bin/flink run \
+      --jobmanager <jobmanagerHost>:8081 \
+      --python examples/python/table/batch/word_count.py
+{% endhighlight %}
+
+- Run a PyFlink job using a [YARN cluster in Per-Job Mode]({% link deployment/resource-providers/yarn.md %}#per-job-cluster-mode):
+{% highlight bash %}
+$ ./bin/flink run \
+      --target yarn-per-job
+      --python examples/python/table/batch/word_count.py
+{% endhighlight %}
+
+- Run a PyFlink application on a native Kubernetes cluster having the cluster ID `<ClusterId>`, it requires a docker image with PyFlink installed, please refer to [Enabling PyFlink in docker]({% link deployment/resource-providers/standalone/docker.md %}#enabling-python):
+{% highlight bash %}
+$ ./bin/flink run-application \
+      --target kubernetes-application \
+      --parallelism 8 \
+      -Dkubernetes.cluster-id=<ClusterId> \
+      -Dtaskmanager.memory.process.size=4096m \
+      -Dkubernetes.taskmanager.cpu=2 \
+      -Dtaskmanager.numberOfTaskSlots=4 \
+      -Dkubernetes.container.image=<PyFlinkImageName> \
+      --pyModule word_count \
+      --pyFiles /opt/flink/examples/python/table/batch/word_count.py
+{% endhighlight %}
+
+To learn more available options, please refer to [Kubernetes]({% link deployment/resource-providers/native_kubernetes.md %})
+or [YARN]({% link deployment/resource-providers/yarn.md %}) which are described in more detail in the
+Resource Provider section.
 {% top %}
diff --git a/docs/deployment/cli.zh.md b/docs/deployment/cli.zh.md
index 45dd16d52edba..a49e2ebd3a2c9 100644
--- a/docs/deployment/cli.zh.md
+++ b/docs/deployment/cli.zh.md
@@ -337,4 +337,76 @@ specified in the `config/flink-config.yaml`.
 For more details on the commands and the available options, please refer to the Resource Provider-specific 
 pages of the documentation.
 
+### Submitting PyFlink Jobs
+
+Currently, users are able to submit a PyFlink job via the CLI. It does not require to specify the
+JAR file path or the entry main class, which is different from the Java job submission.
+
+<span class="label label-info">Note</span> When submitting Python job via `flink run`, Flink will run the command "python". Please run the following command to confirm that the python executable in current environment points to a supported Python version of 3.5+.
+{% highlight bash %}
+$ python --version
+# the version printed here must be 3.5+
+{% endhighlight %}
+
+The following commands show different PyFlink job submission use-cases:
+
+- Run a PyFlink job:
+{% highlight bash %}
+$ ./bin/flink run --python examples/python/table/batch/word_count.py
+{% endhighlight %}
+
+- Run a PyFlink job with additional source and resource files. Files specified in `--pyFiles` will be
+added to the `PYTHONPATH` and, therefore, available in the Python code.
+{% highlight bash %}
+$ ./bin/flink run \
+      --python examples/python/table/batch/word_count.py \
+      --pyFiles file:///user.txt,hdfs:///$namenode_address/username.txt
+{% endhighlight %}
+
+- Run a PyFlink job which will reference Java UDF or external connectors. JAR file specified in `--jarfile` will be uploaded
+to the cluster.:
+{% highlight bash %}
+$ ./bin/flink run \
+      --python examples/python/table/batch/word_count.py \
+      --jarfile <jarFile>
+{% endhighlight %}
+
+- Run a PyFlink job with pyFiles and the main entry module specified in `--pyModule`:
+{% highlight bash %}
+$ ./bin/flink run \
+      --pyModule batch.word_count \
+      --pyFiles examples/python/table/batch
+{% endhighlight %}
+
+- Submit a PyFlink job on a specific JobManager running on host `<jobmanagerHost>` (adapt the command accordingly):
+{% highlight bash %}
+$ ./bin/flink run \
+      --jobmanager <jobmanagerHost>:8081 \
+      --python examples/python/table/batch/word_count.py
+{% endhighlight %}
+
+- Run a PyFlink job using a [YARN cluster in Per-Job Mode]({% link deployment/resource-providers/yarn.zh.md %}#per-job-cluster-mode):
+{% highlight bash %}
+$ ./bin/flink run \
+      --target yarn-per-job
+      --python examples/python/table/batch/word_count.py
+{% endhighlight %}
+
+- Run a PyFlink application on a native Kubernetes cluster having the cluster ID `<ClusterId>`, it requires a docker image with PyFlink installed, please refer to [Enabling PyFlink in docker]({% link deployment/resource-providers/standalone/docker.zh.md %}#enabling-python):
+{% highlight bash %}
+$ ./bin/flink run-application \
+      --target kubernetes-application \
+      --parallelism 8 \
+      -Dkubernetes.cluster-id=<ClusterId> \
+      -Dtaskmanager.memory.process.size=4096m \
+      -Dkubernetes.taskmanager.cpu=2 \
+      -Dtaskmanager.numberOfTaskSlots=4 \
+      -Dkubernetes.container.image=<PyFlinkImageName> \
+      --pyModule word_count \
+      --pyFiles /opt/flink/examples/python/table/batch/word_count.py
+{% endhighlight %}
+
+To learn more available options, please refer to [Kubernetes]({% link deployment/resource-providers/native_kubernetes.zh.md %})
+or [YARN]({% link deployment/resource-providers/yarn.zh.md %}) which are described in more detail in the
+Resource Provider section.
 {% top %}
diff --git a/docs/dev/python/datastream_tutorial.md b/docs/dev/python/datastream_tutorial.md
index 944af44123f68..a2b86e414553c 100644
--- a/docs/dev/python/datastream_tutorial.md
+++ b/docs/dev/python/datastream_tutorial.md
@@ -130,7 +130,7 @@ Next, you can run the example you just created on the command line:
 $ python datastream_tutorial.py
 {% endhighlight %}
 
-The command builds and runs your PyFlink program in a local mini cluster. You can alternatively submit it to a remote cluster using the instructions detailed in [Job Submission Examples]({% link deployment/cli.md %}#job-submission-examples).
+The command builds and runs your PyFlink program in a local mini cluster. You can alternatively submit it to a remote cluster using the instructions detailed in [Job Submission Examples]({% link deployment/cli.md %}#submitting-pyflink-jobs).
 
 Finally, you can see the execution result on the command line:
 
diff --git a/docs/dev/python/datastream_tutorial.zh.md b/docs/dev/python/datastream_tutorial.zh.md
index e121ba0b57ac0..e491ef645c98a 100644
--- a/docs/dev/python/datastream_tutorial.zh.md
+++ b/docs/dev/python/datastream_tutorial.zh.md
@@ -131,7 +131,7 @@ rm -rf /tmp/output
 $ python datastream_tutorial.py
 {% endhighlight %}
 
-这个命令会在本地集群中构建并运行 PyFlink 程序。你也可以使用 [Job Submission Examples]({% link deployment/cli.zh.md %}#job-submission-examples) 中描述的命令将其提交到远程集群。
+这个命令会在本地集群中构建并运行 PyFlink 程序。你也可以使用 [Job Submission Examples]({% link deployment/cli.zh.md %}#submitting-pyflink-jobs) 中描述的命令将其提交到远程集群。
 
 最后，你可以在命令行上看到执行结果:
 
diff --git a/docs/dev/python/table_api_tutorial.md b/docs/dev/python/table_api_tutorial.md
index 9a7ab5e1e5245..e17fc4ecbf848 100644
--- a/docs/dev/python/table_api_tutorial.md
+++ b/docs/dev/python/table_api_tutorial.md
@@ -187,7 +187,7 @@ $ python WordCount.py
 
 The command builds and runs the Python Table API program in a local mini cluster.
 You can also submit the Python Table API program to a remote cluster, you can refer
-[Job Submission Examples]({% link deployment/cli.md %}#job-submission-examples)
+[Job Submission Examples]({% link deployment/cli.md %}#submitting-pyflink-jobs)
 for more details.
 
 Finally, you can see the execution result on the command line:
diff --git a/docs/dev/python/table_api_tutorial.zh.md b/docs/dev/python/table_api_tutorial.zh.md
index f210e08ee405d..8af0eae56264e 100644
--- a/docs/dev/python/table_api_tutorial.zh.md
+++ b/docs/dev/python/table_api_tutorial.zh.md
@@ -191,7 +191,7 @@ $ python WordCount.py
 {% endhighlight %}
 
 上述命令会构建Python Table API程序，并在本地mini cluster中运行。如果想将作业提交到远端集群执行，
-可以参考[作业提交示例]({% link deployment/cli.zh.md %}#job-submission-examples)。
+可以参考[作业提交示例]({% link deployment/cli.zh.md %}#submitting-pyflink-jobs)。
 
 最后，你可以通过如下命令查看你的运行结果：