Skip to content

Commit

Permalink
[Air][Data] Don't promote locality_hints for split (ray-project#26647)
Browse files Browse the repository at this point in the history
Why are these changes needed?
Since locality_hints is an experimental feature, we stop promoting it in doc and don't enable it in AIR. See ray-project#26641 for more context
  • Loading branch information
scv119 authored Jul 18, 2022
1 parent 5ce06ce commit b20f5f5
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 8 deletions.
2 changes: 1 addition & 1 deletion doc/source/data/doc_code/accessing_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def train(self, shard: ray.data.Dataset[int]) -> int:
ds = ray.data.range(10000)
# -> Dataset(num_blocks=200, num_rows=10000, schema=<class 'int'>)

shards = ds.split(n=4, locality_hints=workers)
shards = ds.split(n=4)
# -> [Dataset(num_blocks=13, num_rows=2500, schema=<class 'int'>),
# Dataset(num_blocks=13, num_rows=2500, schema=<class 'int'>), ...]

Expand Down
2 changes: 1 addition & 1 deletion doc/source/data/doc_code/quick_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def train(self, shard) -> int:
workers = [Worker.remote(i) for i in range(4)]
# -> [Actor(Worker, ...), Actor(Worker, ...), ...]

shards = ds.split(n=4, locality_hints=workers)
shards = ds.split(n=4)
# -> [
# Dataset(num_blocks=3, num_rows=45,
# schema={sepal.length: double, sepal.width: double,
Expand Down
4 changes: 2 additions & 2 deletions doc/source/data/examples/nyc_taxi_basic_processing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -858,7 +858,7 @@
"id": "8b10fc64",
"metadata": {},
"source": [
"Next, we split the dataset into ``len(trainers)`` shards, ensuring that the shards are of equal size, and providing the trainer actor handles to Ray Datasets as locality hints, so Datasets can try to colocate shard data with trainers in order to decrease data movement."
"Next, we split the dataset into ``len(trainers)`` shards, ensuring that the shards are of equal size."
]
},
{
Expand All @@ -884,7 +884,7 @@
}
],
"source": [
"shards = ds.split(n=len(trainers), equal=True, locality_hints=trainers)\n",
"shards = ds.split(n=len(trainers), equal=True)\n",
"shards"
]
},
Expand Down
5 changes: 1 addition & 4 deletions python/ray/train/_internal/dataset_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ class RayDatasetSpec:
training workers (to use as locality hints). The Callable is expected to
return a list of RayDatasets or a list of dictionaries of RayDatasets,
with the length of the list equal to the length of the list of actor handles.
If None is provided, the provided Ray Dataset(s) will be simply be split using
the actor handles as locality hints.
If None is provided, the provided Ray Dataset(s) will be equally split.
"""

Expand All @@ -48,7 +47,6 @@ def split_dataset(dataset_or_pipeline):
return dataset_or_pipeline.split(
len(training_worker_handles),
equal=True,
locality_hints=training_worker_handles,
)

if isinstance(self.dataset_or_dict, dict):
Expand Down Expand Up @@ -209,7 +207,6 @@ def get_dataset_shards(
dataset_splits = dataset.split(
len(training_worker_handles),
equal=True,
locality_hints=training_worker_handles,
)
else:
dataset_splits = [dataset] * len(training_worker_handles)
Expand Down

0 comments on commit b20f5f5

Please sign in to comment.