Skip to content

Commit

Permalink
[ci] Add backup release task and helper functions to docker tag clean…
Browse files Browse the repository at this point in the history
…up workflow (ray-project#43308)

Follow up to ray-project#43245
- Add filter function _is_release_tag to check whether a tag is a release tag
- Add function to copy tag from Docker Hub to AWS ECR
- Add function to back up all release tags in a repo from Docker Hub to AWS ECR

Signed-off-by: khluu <[email protected]>
Signed-off-by: khluu <[email protected]>
  • Loading branch information
khluu authored Mar 1, 2024
1 parent 2c37909 commit 72de08c
Show file tree
Hide file tree
Showing 4 changed files with 235 additions and 1 deletion.
13 changes: 13 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,16 @@ register_execution_platforms(
"@local_config_platform//:host",
"//:hermetic_python_platform",
)

http_archive(
name = "crane_linux_x86_64",
build_file_content = """
filegroup(
name = "file",
srcs = glob(["**"]),
visibility = ["//visibility:public"],
)
""",
sha256 = "daa629648e1d1d10fc8bde5e6ce4176cbc0cd48a32211b28c3fd806e0fa5f29b",
urls = ["https://github.com/google/go-containerregistry/releases/download/v0.19.0/go-containerregistry_Linux_x86_64.tar.gz"]
)
4 changes: 4 additions & 0 deletions ci/ray_ci/automation/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,14 @@ py_binary(
py_library(
name = "docker_tags_lib",
srcs = ["docker_tags_lib.py"],
data = [
"@crane_linux_x86_64//:file",
],
visibility = ["//ci/ray_ci/automation:__subpackages__"],
deps = [
ci_require("requests"),
"//ci/ray_ci:ray_ci_lib",
ci_require("bazel-runfiles"),
],
)

Expand Down
131 changes: 130 additions & 1 deletion ci/ray_ci/automation/docker_tags_lib.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import subprocess
from datetime import datetime
from typing import List, Optional, Callable
from typing import List, Optional, Callable, Tuple
import os
from dateutil import parser
import runfiles
import platform

import requests

Expand Down Expand Up @@ -233,3 +235,130 @@ def query_tags_from_docker_with_oci(namespace: str, repository: str) -> List[str
raise Exception(f"Failed to query tags from Docker: {response.json()}")

return [f"{namespace}/{repository}:{t}" for t in response.json()["tags"]]


def _is_release_tag(
tag: str,
release_versions: Optional[List[str]] = None,
) -> bool:
"""
Check if tag is a release tag & is in the list of release versions.
Tag input format should be just the tag name, without namespace/repository.
Tag input can be in any format queried from Docker Hub: "x.y.z-...", "a1s2d3-..."
Args:
tag: Docker tag name
release_versions: List of release versions.
If None, don't filter by release version.
Returns:
True if tag is a release tag and is in the list of release versions.
False otherwise.
"""
versions = tag.split(".")
if len(versions) != 3 and "post1" not in tag:
return False
# Parse variables into major, minor, patch version
major, minor, patch = versions[0], versions[1], versions[2]
extra = versions[3] if len(versions) > 3 else None
if not major.isnumeric() or not minor.isnumeric():
return False
if not patch.isnumeric() and "rc" not in patch and "-" not in patch:
return False

if "-" in patch:
patch = patch.split("-")[0]
release_version = ".".join([major, minor, patch])
if extra:
release_version += f".{extra}"
if release_versions and release_version not in release_versions:
return False

return True


def _crane_binary():
r = runfiles.Create()
system = platform.system()
if system != "Linux" or platform.processor() != "x86_64":
raise ValueError(f"Unsupported platform: {system}")
return r.Rlocation("crane_linux_x86_64/crane")


def _call_crane_cp(tag: str, source: str, aws_ecr_repo: str) -> Tuple[int, str]:
try:
with subprocess.Popen(
[
_crane_binary(),
"cp",
source,
f"{aws_ecr_repo}:{tag}",
],
stdout=subprocess.PIPE,
text=True,
) as proc:
output = ""
for line in proc.stdout:
logger.info(line + "\n")
output += line
return_code = proc.wait()
if return_code:
raise subprocess.CalledProcessError(return_code, proc.args)
return return_code, output
except subprocess.CalledProcessError as e:
return e.returncode, e.output


def copy_tag_to_aws_ecr(tag: str, aws_ecr_repo: str) -> bool:
"""
Copy tag from Docker Hub to AWS ECR.
Args:
tag: Docker tag name in format "namespace/repository:tag"
Returns:
True if tag was copied successfully, False otherwise.
"""
_, repo_tag = tag.split("/")
tag_name = repo_tag.split(":")[1]
logger.info(f"Copying from {tag} to {aws_ecr_repo}:{tag_name}......")
return_code, output = _call_crane_cp(
tag=tag_name,
source=tag,
aws_ecr_repo=aws_ecr_repo,
)
if return_code:
logger.info(f"Failed to copy {tag} to {aws_ecr_repo}:{tag_name}......")
logger.info(f"Error: {output}")
return False
logger.info(f"Copied {tag} to {aws_ecr_repo}:{tag_name} successfully")
return True


def backup_release_tags(
namespace: str,
repository: str,
aws_ecr_repo: str,
docker_username: str,
docker_password: str,
release_versions: Optional[List[str]] = None,
) -> None:
"""
Backup release tags to AWS ECR.
Args:
release_versions: List of release versions to backup
aws_ecr_repo: AWS ECR repository
"""
docker_hub_token = _get_docker_hub_auth_token(docker_username, docker_password)
docker_hub_tags = query_tags_from_docker_hub(
filter_func=lambda t: _is_release_tag(t, release_versions),
namespace=namespace,
repository=repository,
docker_hub_token=docker_hub_token,
)
_write_to_file("release_tags.txt", docker_hub_tags)
for t in docker_hub_tags:
copy_tag_to_aws_ecr(tag=t, aws_ecr_repo=aws_ecr_repo)


def _write_to_file(file_path: str, content: List[str]) -> None:
file_path = os.path.join(bazel_workspace_dir, file_path)
logger.info(f"Writing to {file_path}......")
with open(file_path, "w") as f:
f.write("\n".join(content))
88 changes: 88 additions & 0 deletions ci/ray_ci/automation/test_docker_tags_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@
_get_docker_auth_token,
_get_docker_hub_auth_token,
_get_image_creation_time,
backup_release_tags,
copy_tag_to_aws_ecr,
delete_tag,
_list_recent_commit_short_shas,
query_tags_from_docker_hub,
query_tags_from_docker_with_oci,
_is_release_tag,
AuthTokenException,
RetrieveImageConfigException,
DockerHubRateLimitException,
Expand Down Expand Up @@ -214,6 +217,56 @@ def test_delete_tag_failure_rate_limit_exceeded(mock_requests):
}


@pytest.mark.parametrize(
("tag", "release_versions", "expected_value"),
[
("2.0.0", ["2.0.0"], True),
("2.0.0rc0", ["2.0.0rc0"], True),
("2.0.0-py38", ["2.0.0"], True),
("2.0.0-py38-cu123", ["2.0.0"], True),
("2.0.0.post1", ["2.0.0.post1"], True),
("2.0.0.1", ["2.0.0"], False),
("2.0.0.1r", ["2.0.0"], False),
("a.1.c", ["2.0.0"], False),
("1.a.b", ["2.0.0"], False),
("2.0.0rc0", ["2.0.0"], False),
("2.0.0", ["2.0.0rc0"], False),
("2.0.0.a1s2d3", ["2.0.0"], False),
("2.0.0.a1s2d3-py38-cu123", ["2.0.0"], False),
("2.0.0", None, True),
],
)
def test_is_release_tag(tag, release_versions, expected_value):
assert _is_release_tag(tag, release_versions) == expected_value


@mock.patch("ci.ray_ci.automation.docker_tags_lib._call_crane_cp")
def test_copy_tag_to_aws_ecr(mock_call_crane_cp):
tag = "test_namespace/test_repository:test_tag"
mock_call_crane_cp.return_value = (
0,
"aws-ecr/name/repo:test_tag: digest: sha256:sample-sha256 size: 1788",
)

is_copied = copy_tag_to_aws_ecr(tag, "aws-ecr/name/repo")
mock_call_crane_cp.assert_called_once_with(
tag="test_tag", source=tag, aws_ecr_repo="aws-ecr/name/repo"
)
assert is_copied is True


@mock.patch("ci.ray_ci.automation.docker_tags_lib._call_crane_cp")
def test_copy_tag_to_aws_ecr_failure(mock_call_crane_cp):
tag = "test_namespace/test_repository:test_tag"
mock_call_crane_cp.return_value = (1, "Error: Failed to copy tag.")

is_copied = copy_tag_to_aws_ecr(tag, "aws-ecr/name/repo")
mock_call_crane_cp.assert_called_once_with(
tag="test_tag", source=tag, aws_ecr_repo="aws-ecr/name/repo"
)
assert is_copied is False


def _make_docker_hub_response(
tag: str, page_count: int, namespace: str, repository: str, page_limit: int
):
Expand Down Expand Up @@ -341,5 +394,40 @@ def test_query_tags_from_docker_with_oci_failure(mock_requests, mock_get_token):
query_tags_from_docker_with_oci("test_namespace", "test_repo")


@mock.patch("ci.ray_ci.automation.docker_tags_lib._get_docker_hub_auth_token")
@mock.patch("ci.ray_ci.automation.docker_tags_lib.query_tags_from_docker_hub")
@mock.patch("ci.ray_ci.automation.docker_tags_lib._write_to_file")
@mock.patch("ci.ray_ci.automation.docker_tags_lib.copy_tag_to_aws_ecr")
def test_backup_release_tags(
mock_copy_tag, mock_write, mock_query_tags, mock_get_token
):
namespace = "test_namespace"
repository = "test_repository"
aws_ecr_repo = "test_aws_ecr_repo"

mock_get_token.return_value = "test_token"
mock_query_tags.return_value = [
f"{namespace}/{repository}:2.0.{i}" for i in range(10)
]

backup_release_tags(
namespace=namespace,
repository=repository,
aws_ecr_repo=aws_ecr_repo,
docker_username="test_username",
docker_password="test_password",
)

assert mock_query_tags.call_count == 1
assert mock_query_tags.call_args.kwargs["namespace"] == namespace
assert mock_query_tags.call_args.kwargs["repository"] == repository
assert mock_query_tags.call_args.kwargs["docker_hub_token"] == "test_token"
assert mock_write.call_count == 1
assert mock_copy_tag.call_count == 10
for i, call_arg in enumerate(mock_copy_tag.call_args_list):
assert call_arg.kwargs["aws_ecr_repo"] == aws_ecr_repo
assert call_arg.kwargs["tag"] == f"{namespace}/{repository}:2.0.{i}"


if __name__ == "__main__":
sys.exit(pytest.main(["-v", __file__]))

0 comments on commit 72de08c

Please sign in to comment.