Skip to content

Commit

Permalink
[runtime env] Add environment variable to skip parsing .gitignore f…
Browse files Browse the repository at this point in the history
…ile into `"excludes"` (ray-project#33149)

Currently, if a user specifies a local working_dir, any .gitignore file in the directory will be parsed and the specified files will be excluded from the uploaded working_dir package. This is an issue if the user wants to have some files in .gitignore (e.g. because they are very large) but still wants to upload them as part of the working_dir. Previously, there was no way to override this behavior.

This PR adds an environment variable RAY_RUNTIME_ENV_IGNORE_GITIGNORE which if set to 1, skips all parsing of gitignore files.
  • Loading branch information
architkulkarni authored Mar 11, 2023
1 parent f20d484 commit c3204af
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 4 deletions.
4 changes: 2 additions & 2 deletions doc/source/ray-core/handling-dependencies.rst
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ The ``runtime_env`` is a Python dictionary or a Python class :class:`ray.runtime

Note: Setting a local directory per-task or per-actor is currently unsupported; it can only be set per-job (i.e., in ``ray.init()``).

Note: If your local directory contains a ``.gitignore`` file, the files and paths specified therein will not be uploaded to the cluster.
Note: If the local directory contains a ``.gitignore`` file, the files and paths specified there are not uploaded to the cluster. You can disable this by setting the environment variable `RAY_RUNTIME_ENV_IGNORE_GITIGNORE=1` on the machine doing the uploading.

- ``py_modules`` (List[str|module]): Specifies Python modules to be available for import in the Ray workers. (For more ways to specify packages, see also the ``pip`` and ``conda`` fields below.)
Each entry must be either (1) a path to a local directory, (2) a URI to a remote zip file (see :ref:`remote-uris` for details), (3) a Python module object, or (4) a path to a local `.whl` file.
Expand All @@ -325,7 +325,7 @@ The ``runtime_env`` is a Python dictionary or a Python class :class:`ray.runtime

Note: Setting options (1) and (3) per-task or per-actor is currently unsupported, it can only be set per-job (i.e., in ``ray.init()``).

Note: For option (1), if your local directory contains a ``.gitignore`` file, the files and paths specified therein will not be uploaded to the cluster.
Note: For option (1), if the local directory contains a ``.gitignore`` file, the files and paths specified there are not uploaded to the cluster. You can disable this by setting the environment variable `RAY_RUNTIME_ENV_IGNORE_GITIGNORE=1` on the machine doing the uploading.

Note: This feature is currently limited to modules that are packages with a single directory containing an ``__init__.py`` file. For single-file modules, you may use ``working_dir``.

Expand Down
3 changes: 3 additions & 0 deletions python/ray/_private/ray_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ def env_set_by_user(key):
# the local working_dir and py_modules to be uploaded, or these files might get
# garbage collected before the job starts.
RAY_RUNTIME_ENV_URI_PIN_EXPIRATION_S_DEFAULT = 10 * 60
# If set to 1, then `.gitignore` files will not be parsed and loaded into "excludes"
# when using a local working_dir or py_modules.
RAY_RUNTIME_ENV_IGNORE_GITIGNORE = "RAY_RUNTIME_ENV_IGNORE_GITIGNORE"
RAY_STORAGE_ENVIRONMENT_VARIABLE = "RAY_STORAGE"
# Hook for running a user-specified runtime-env hook. This hook will be called
# unconditionally given the runtime_env dict passed for ray.init. It must return
Expand Down
16 changes: 16 additions & 0 deletions python/ray/_private/runtime_env/packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from ray._private.ray_constants import (
RAY_RUNTIME_ENV_URI_PIN_EXPIRATION_S_DEFAULT,
RAY_RUNTIME_ENV_URI_PIN_EXPIRATION_S_ENV_VAR,
RAY_RUNTIME_ENV_IGNORE_GITIGNORE,
)
from ray._private.gcs_utils import GcsAioClient
from ray._private.thirdparty.pathspec import PathSpec
Expand Down Expand Up @@ -246,6 +247,21 @@ def match(p: Path):


def _get_gitignore(path: Path) -> Optional[Callable]:
"""Returns a function that returns True if the path should be excluded.
Returns None if there is no .gitignore file in the path, or if the
RAY_RUNTIME_ENV_IGNORE_GITIGNORE environment variable is set to 1.
Args:
path: The path to the directory to check for a .gitignore file.
Returns:
A function that returns True if the path should be excluded.
"""
ignore_gitignore = os.environ.get(RAY_RUNTIME_ENV_IGNORE_GITIGNORE, "0") == "1"
if ignore_gitignore:
return None

path = path.absolute()
ignore_file = path / ".gitignore"
if ignore_file.is_file():
Expand Down
33 changes: 31 additions & 2 deletions python/ray/tests/test_runtime_env_packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
import pytest

from ray._private.gcs_utils import GcsClient
from ray._private.ray_constants import KV_NAMESPACE_PACKAGE
from ray._private.ray_constants import (
KV_NAMESPACE_PACKAGE,
RAY_RUNTIME_ENV_IGNORE_GITIGNORE,
)
from ray._private.runtime_env.packaging import (
GCS_STORAGE_MAX_SIZE,
MAC_OS_ZIP_HIDDEN_DIR_NAME,
Expand All @@ -30,6 +33,7 @@
remove_dir_from_filepaths,
unzip_package,
upload_package_if_needed,
_get_gitignore,
)
from ray.experimental.internal_kv import (
_initialize_internal_kv,
Expand Down Expand Up @@ -510,14 +514,27 @@ def test_parse_gcs_uri(self, gcs_uri):
assert package_name == gcs_uri.split("/")[-1]


def test_get_gitignore(tmp_path):
gitignore_path = tmp_path / ".gitignore"
gitignore_path.write_text("*.pyc")
assert _get_gitignore(tmp_path)(Path(tmp_path / "foo.pyc")) is True
assert _get_gitignore(tmp_path)(Path(tmp_path / "foo.py")) is False


@pytest.mark.parametrize("ignore_gitignore", [True, False])
@pytest.mark.skipif(sys.platform == "win32", reason="Fails on windows")
def test_travel(tmp_path):
def test_travel(tmp_path, ignore_gitignore, monkeypatch):
dir_paths = set()
file_paths = set()
item_num = 0
excludes = []
root = tmp_path / "test"

if ignore_gitignore:
monkeypatch.setenv(RAY_RUNTIME_ENV_IGNORE_GITIGNORE, "1")
else:
monkeypatch.delenv(RAY_RUNTIME_ENV_IGNORE_GITIGNORE, raising=False)

def construct(path, excluded=False, depth=0):
nonlocal item_num
path.mkdir(parents=True)
Expand Down Expand Up @@ -553,6 +570,18 @@ def construct(path, excluded=False, depth=0):
file_paths.add((str(path / uid), str(v)))
item_num += 1

# Add gitignore file
gitignore = root / ".gitignore"
gitignore.write_text("*.pyc")
file_paths.add((str(gitignore), "*.pyc"))

# Add file that should be ignored by gitignore
with (root / "foo.pyc").open("w") as f:
f.write("foo")
if ignore_gitignore:
# If ignore_gitignore is True, then the file should be visited
file_paths.add((str(root / "foo.pyc"), "foo"))

construct(root)
exclude_spec = _get_excludes(root, excludes)
visited_dir_paths = set()
Expand Down

0 comments on commit c3204af

Please sign in to comment.