Skip to content

Commit

Permalink
feat(evaluate): allow loading local datasets for evaluation (#362)
Browse files Browse the repository at this point in the history
  • Loading branch information
akotyla authored Feb 14, 2025
1 parent 908f943 commit 20b3db2
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 0 deletions.
2 changes: 2 additions & 0 deletions packages/ragbits-evaluate/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

## Unreleased

- Add local data loader (#334).

## 0.8.0 (2025-01-29)

### Changed
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from typing import TypeAlias

from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict, load_dataset

from .base import DataLoader

HFData: TypeAlias = DatasetDict | Dataset | IterableDatasetDict | IterableDataset


class LocalDataLoader(DataLoader[DatasetDict]):
"""
Local data loader.
"""

AVAILABLE_BUILDERS = {
"json",
"csv",
"parquet",
"arrow",
"text",
"xml",
"webdataset",
"imagefolder",
"audiofolder",
"videofolder",
}

def __init__(self, path: str, split: str, builder: str) -> None:
self.path = path
self.split = split
self.builder = builder

if self.builder not in self.AVAILABLE_BUILDERS:
raise ValueError(
f"Unsupported builder '{self.builder}'. Available builders: {', '.join(self.AVAILABLE_BUILDERS)}"
)

async def load(self) -> DatasetDict:
"""
Load the data from the local file.
Returns:
The loaded data.
"""
return load_dataset(self.builder, data_files=self.path, split=self.split)

0 comments on commit 20b3db2

Please sign in to comment.