Skip to content

Commit

Permalink
fix: corrected shard path
Browse files Browse the repository at this point in the history
  • Loading branch information
sohamtiwari3120 committed Nov 3, 2024
1 parent 09e4ec1 commit 161b6bc
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion train_gpt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def __init__(self, B, T, process_rank: int, num_processes: int, split: str) -> N
self.split = split
data_root = os.path.join(os.path.dirname(__file__), 'edu_fineweb10')
shards = os.listdir(data_root)
shards = [shard for shard in shards if split in shard]
shards = [os.path.join(data_root, shard) for shard in shards if split in shard]
assert len(shards) > 0, f"No shards found for split: {split}"
self.shards = shards
self.starting_position = self.B * self.T * self.process_rank
Expand Down

0 comments on commit 161b6bc

Please sign in to comment.