Skip to content

Commit

Permalink
Consolidate and simplify usage of get_available_packages (apache#35556)
Browse files Browse the repository at this point in the history
With recent changes where we largely switched to use primarily short
version of packages rather than `apache-airflow-providers-*` and
filters, there were several places where get_available_packages
family of method have been used inconsistently in various breeze
commands.

This change consolidates it all to commong way, it also replaces
the inconsistent `providers-index` with regular
"apache-airlfow-providers" - which is now not-ambiguous since the
package filters usage is not that much used any more.
  • Loading branch information
potiuk authored Nov 9, 2023
1 parent f791900 commit da86802
Show file tree
Hide file tree
Showing 30 changed files with 312 additions and 319 deletions.
3 changes: 2 additions & 1 deletion BREEZE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,8 @@ Now with the remaining part, replace every ``dash("-")`` with a ``dot(".")``.
Example:
If the provider name is ``apache-airflow-providers-cncf-kubernetes``, it will be ``cncf.kubernetes``.

Note: For building docs for apache-airflow-providers index, use ``providers-index`` as the short hand operator.
Note: For building docs for apache-airflow-providers index, use ``apache-airflow-providers``
as the short hand operator.

Running static checks
---------------------
Expand Down
4 changes: 2 additions & 2 deletions dev/README_RELEASE_PROVIDER_PACKAGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ If we want to just release some providers you can release them using package nam

```shell script
cd "${AIRFLOW_REPO_ROOT}"
breeze build-docs providers-index cncf.kubernetes sftp --clean-build
breeze build-docs apache-airflow-providers cncf.kubernetes sftp --clean-build
```


Expand Down Expand Up @@ -428,7 +428,7 @@ way faster on multi-cpu machines when you are publishing multiple providers:
```shell script
cd "${AIRFLOW_REPO_ROOT}"

breeze release-management publish-docs providers-index --package-filter 'apache-airflow-providers-*' \
breeze release-management publish-docs apache-airflow-providers --package-filter 'apache-airflow-providers-*' \
--override-versioned --run-in-parallel

breeze release-management add-back-references all-providers
Expand Down
10 changes: 5 additions & 5 deletions dev/breeze/src/airflow_breeze/commands/developer_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
from airflow_breeze.utils.cache import read_from_cache_file
from airflow_breeze.utils.coertions import one_or_none_set
from airflow_breeze.utils.common_options import (
argument_short_doc_packages_with_providers_index,
argument_doc_packages,
option_airflow_constraints_reference,
option_airflow_extras,
option_answer,
Expand Down Expand Up @@ -88,7 +88,7 @@
get_extra_docker_flags,
perform_environment_checks,
)
from airflow_breeze.utils.general_utils import expand_all_providers
from airflow_breeze.utils.packages import expand_all_provider_packages
from airflow_breeze.utils.path_utils import (
AIRFLOW_SOURCES_ROOT,
cleanup_python_generated_files,
Expand Down Expand Up @@ -388,7 +388,6 @@ def start_airflow(
@main.command(name="build-docs")
@click.option("-d", "--docs-only", help="Only build documentation.", is_flag=True)
@click.option("-s", "--spellcheck-only", help="Only run spell checking.", is_flag=True)
@argument_short_doc_packages_with_providers_index
@option_builder
@click.option(
"--package-filter",
Expand All @@ -409,11 +408,12 @@ def start_airflow(
help="Builds documentation in one pass only. This is useful for debugging sphinx errors.",
is_flag=True,
)
@argument_doc_packages
@option_github_repository
@option_verbose
@option_dry_run
def build_docs(
short_doc_packages: tuple[str, ...],
doc_packages: tuple[str, ...],
docs_only: bool,
spellcheck_only: bool,
builder: str,
Expand Down Expand Up @@ -444,7 +444,7 @@ def build_docs(
spellcheck_only=spellcheck_only,
one_pass_only=one_pass_only,
skip_environment_initialization=True,
short_doc_packages=expand_all_providers(short_doc_packages),
short_doc_packages=expand_all_provider_packages(doc_packages),
)
extra_docker_flags = get_extra_docker_flags(MOUNT_SELECTED)
env = get_env_variables_for_docker_commands(params)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,8 @@
)
from airflow_breeze.utils.ci_group import ci_group
from airflow_breeze.utils.common_options import (
argument_packages,
argument_short_doc_packages,
argument_short_doc_packages_with_providers_index,
argument_doc_packages,
argument_provider_packages,
option_airflow_constraints_mode_ci,
option_airflow_constraints_mode_update,
option_airflow_constraints_reference,
Expand Down Expand Up @@ -88,8 +87,8 @@
get_extra_docker_flags,
perform_environment_checks,
)
from airflow_breeze.utils.general_utils import expand_all_providers
from airflow_breeze.utils.github import download_constraints_file, get_active_airflow_versions
from airflow_breeze.utils.packages import convert_to_long_package_names, expand_all_provider_packages
from airflow_breeze.utils.parallel import (
GenericRegexpProgressMatcher,
SummarizeAfter,
Expand All @@ -109,10 +108,6 @@
get_related_providers,
)
from airflow_breeze.utils.publish_docs_builder import PublishDocsBuilder
from airflow_breeze.utils.publish_docs_helpers import (
get_available_packages,
process_package_filters,
)
from airflow_breeze.utils.python_versions import get_python_version_list
from airflow_breeze.utils.run_utils import (
RunCommandResult,
Expand Down Expand Up @@ -229,7 +224,6 @@ def prepare_airflow_packages(
help="Prepare CHANGELOG, README and COMMITS information for providers.",
)
@option_debug_release_management
@argument_packages
@click.option(
"--base-branch",
type=str,
Expand All @@ -248,14 +242,15 @@ def prepare_airflow_packages(
help="Only regenerate missing documentation, do not bump version. Useful if templates were added"
" and you need to regenerate documentation.",
)
@argument_provider_packages
@option_verbose
@option_dry_run
@option_answer
def prepare_provider_documentation(
github_repository: str,
base_branch: str,
debug: bool,
packages: list[str],
provider_packages: list[str],
only_min_version_update: bool,
regenerate_missing_docs: bool,
):
Expand All @@ -272,7 +267,10 @@ def prepare_provider_documentation(
skip_environment_initialization=True,
)
rebuild_or_pull_ci_image_if_needed(command_params=shell_params)
cmd_to_run = ["/opt/airflow/scripts/in_container/run_prepare_provider_documentation.sh", *packages]
cmd_to_run = [
"/opt/airflow/scripts/in_container/run_prepare_provider_documentation.sh",
*provider_packages,
]
answer = get_forced_answer()
result_command = run_docker_command_with_debug(
params=shell_params,
Expand All @@ -295,7 +293,7 @@ def prepare_provider_documentation(
help="Read list of packages from text file (one package per line).",
)
@option_debug_release_management
@argument_packages
@argument_provider_packages
@option_github_repository
@option_verbose
@option_dry_run
Expand All @@ -304,12 +302,12 @@ def prepare_provider_packages(
version_suffix_for_pypi: str,
package_list_file: IO,
debug: bool,
packages: tuple[str, ...],
provider_packages: tuple[str, ...],
github_repository: str,
):
perform_environment_checks()
cleanup_python_generated_files()
packages_list = list(packages)
packages_list = list(provider_packages)

removed_provider_ids = get_removed_provider_ids()
if package_list_file:
Expand Down Expand Up @@ -707,8 +705,8 @@ def install_provider_packages(
@option_airflow_extras
@option_airflow_constraints_reference
@option_skip_constraints
@option_install_selected_providers
@option_use_packages_from_dist
@option_install_selected_providers
@option_installation_package_format
@option_debug_release_management
@option_github_repository
Expand Down Expand Up @@ -789,7 +787,7 @@ def run_docs_publishing(


def run_publish_docs_in_parallel(
package_list: list[str],
package_list: tuple[str, ...],
airflow_site_directory: str,
override_versioned: bool,
include_success_outputs: bool,
Expand Down Expand Up @@ -837,7 +835,6 @@ def run_publish_docs_in_parallel(
)
@click.option("-s", "--override-versioned", help="Overrides versioned directories.", is_flag=True)
@option_airflow_site_directory
@argument_short_doc_packages_with_providers_index
@click.option(
"--package-filter",
help="List of packages to consider. You can use the full names like apache-airflow-providers-<provider>, "
Expand All @@ -851,12 +848,13 @@ def run_publish_docs_in_parallel(
@option_debug_resources
@option_include_success_outputs
@option_skip_cleanup
@argument_doc_packages
@option_verbose
@option_dry_run
def publish_docs(
override_versioned: bool,
airflow_site_directory: str,
short_doc_packages: tuple[str, ...],
doc_packages: tuple[str, ...],
package_filter: tuple[str, ...],
run_in_parallel: bool,
parallelism: int,
Expand All @@ -871,10 +869,9 @@ def publish_docs(
"Provide the path of cloned airflow-site repo\n"
)

current_packages = process_package_filters(
get_available_packages(), package_filter, expand_all_providers(short_doc_packages)
current_packages = convert_to_long_package_names(
package_filters=package_filter, packages_short_form=expand_all_provider_packages(doc_packages)
)

print(f"Publishing docs for {len(current_packages)} package(s)")
for pkg in current_packages:
print(f" - {pkg}")
Expand All @@ -885,7 +882,7 @@ def publish_docs(
parallelism=parallelism,
skip_cleanup=skip_cleanup,
debug_resources=debug_resources,
include_success_outputs=True,
include_success_outputs=include_success_outputs,
airflow_site_directory=airflow_site_directory,
override_versioned=override_versioned,
)
Expand All @@ -901,12 +898,12 @@ def publish_docs(
help="Command to add back references for documentation to make it backward compatible.",
)
@option_airflow_site_directory
@argument_short_doc_packages
@argument_doc_packages
@option_verbose
@option_dry_run
def add_back_references(
airflow_site_directory: str,
short_doc_packages: tuple[str, ...],
doc_packages: tuple[str, ...],
):
"""Adds back references for documentation generated by build-docs and publish-docs"""
site_path = Path(airflow_site_directory)
Expand All @@ -916,12 +913,12 @@ def add_back_references(
"Provide the path of cloned airflow-site repo\n"
)
sys.exit(1)
if not short_doc_packages:
if not doc_packages:
get_console().print(
"\n[error]You need to specify at least one package to generate back references for\n"
)
sys.exit(1)
start_generating_back_references(site_path, list(expand_all_providers(short_doc_packages)))
start_generating_back_references(site_path, list(expand_all_provider_packages(doc_packages)))


@release_management.command(
Expand Down Expand Up @@ -1180,9 +1177,9 @@ def get_prs_for_package(package_id: str) -> list[int]:
)
@click.option("--excluded-pr-list", type=str, help="Coma-separated list of PRs to exclude from the issue.")
@click.option("--disable-progress", is_flag=True, help="Disable progress bar")
@argument_packages
@argument_provider_packages
def generate_issue_content_providers(
packages: list[str],
provider_packages: list[str],
github_token: str,
suffix: str,
only_available_in_dist: bool,
Expand All @@ -1199,8 +1196,8 @@ class ProviderPRInfo(NamedTuple):
version: str
pr_list: list[PullRequest.PullRequest | Issue.Issue]

if not packages:
packages = list(DEPENDENCIES.keys())
if not provider_packages:
provider_packages = list(DEPENDENCIES.keys())
with ci_group("Generates GitHub issue content with people who can test it"):
if excluded_pr_list:
excluded_prs = [int(pr) for pr in excluded_pr_list.split(",")]
Expand All @@ -1211,7 +1208,7 @@ class ProviderPRInfo(NamedTuple):
if only_available_in_dist:
files_in_dist = os.listdir(str(AIRFLOW_SOURCES_ROOT / "dist"))
prepared_package_ids = []
for package_id in packages:
for package_id in provider_packages:
if not only_available_in_dist or is_package_in_dist(files_in_dist, package_id):
get_console().print(f"Extracting PRs for provider {package_id}")
prepared_package_ids.append(package_id)
Expand Down
36 changes: 8 additions & 28 deletions dev/breeze/src/airflow_breeze/global_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from pathlib import Path

from airflow_breeze.utils.host_info_utils import Architecture
from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT, PROVIDER_DEPENDENCIES_JSON_FILE_PATH
from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT

RUNS_ON_PUBLIC_RUNNER = '["ubuntu-22.04"]'
# we should get more sophisticated logic here in the future, but for now we just check if
Expand Down Expand Up @@ -106,15 +106,13 @@

PIP_VERSION = "23.3.1"

# key used for generating providers index
PROVIDERS_INDEX_KEY = "providers-index"
# keys for generated non providers docs
NON_PROVIDERS_DOC_KEYS = ["apache-airflow", "docker-stack", "helm-chart"]
# Mapping which store short-key:full-key
ALL_SPECIAL_DOC_KEYS = {
PROVIDERS_INDEX_KEY: "apache-airflow-providers",
**dict(zip(NON_PROVIDERS_DOC_KEYS, NON_PROVIDERS_DOC_KEYS)),
}
# packages that providers docs
REGULAR_DOC_PACKAGES = [
"apache-airflow",
"docker-stack",
"helm-chart",
"apache-airflow-providers",
]


@lru_cache(maxsize=None)
Expand Down Expand Up @@ -184,24 +182,6 @@ def all_helm_test_packages() -> list[str]:
ALL_HISTORICAL_PYTHON_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11"]


def get_available_documentation_packages(short_version=False, only_providers: bool = False) -> list[str]:
provider_names: list[str] = list(json.loads(PROVIDER_DEPENDENCIES_JSON_FILE_PATH.read_text()).keys())
doc_provider_names = [provider_name.replace(".", "-") for provider_name in provider_names]
available_packages = []
if not only_providers:
available_packages.extend(NON_PROVIDERS_DOC_KEYS)
all_providers = [f"apache-airflow-providers-{doc_provider}" for doc_provider in doc_provider_names]
all_providers.sort()
available_packages.extend(all_providers)
if short_version:
prefix_len = len("apache-airflow-providers-")
available_packages = [
package[prefix_len:].replace("-", ".") if len(package) > prefix_len else package
for package in available_packages
]
return available_packages


def get_default_platform_machine() -> str:
machine = platform.uname().machine
# Some additional conversion for various platforms...
Expand Down
4 changes: 2 additions & 2 deletions dev/breeze/src/airflow_breeze/params/doc_build_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from dataclasses import dataclass

from airflow_breeze.branch_defaults import AIRFLOW_BRANCH
from airflow_breeze.utils.general_utils import get_docs_filter_name_from_short_hand
from airflow_breeze.utils.packages import get_long_package_names

providers_prefix = "apache-airflow-providers-"

Expand All @@ -47,7 +47,7 @@ def args_doc_builder(self) -> list[str]:
if AIRFLOW_BRANCH != "main":
doc_args.append("--disable-provider-checks")
if self.short_doc_packages:
for filter_from_short_doc in get_docs_filter_name_from_short_hand(self.short_doc_packages):
for filter_from_short_doc in get_long_package_names(self.short_doc_packages):
doc_args.extend(["--package-filter", filter_from_short_doc])
if self.package_filter:
for filter in self.package_filter:
Expand Down
3 changes: 3 additions & 0 deletions dev/breeze/src/airflow_breeze/utils/add_back_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ def start_generating_back_references(airflow_site_directory: Path, short_provide
if "docker-stack" in short_provider_package_ids:
get_console().print("[info]Skipping docker-stack package. No back-reference needed.")
short_provider_package_ids.remove("docker-stack")
if "apache-airflow-providers" in short_provider_package_ids:
get_console().print("[info]Skipping apache-airflow-providers package. No back-reference needed.")
short_provider_package_ids.remove("apache-airflow-providers")
if short_provider_package_ids:
all_providers = [
f"apache-airflow-providers-{package.replace('.','-')}" for package in short_provider_package_ids
Expand Down
Loading

0 comments on commit da86802

Please sign in to comment.