Skip to content

Commit

Permalink
Merge pull request #221 from dscran/master
Browse files Browse the repository at this point in the history
 Add a simple filter interface to RunDirectory
  • Loading branch information
takluyver authored Oct 28, 2019
2 parents 26cc5fb + df3deae commit f3d5637
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 5 deletions.
15 changes: 10 additions & 5 deletions karabo_data/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1288,7 +1288,7 @@ def H5File(path):
return DataCollection.from_path(path)


def RunDirectory(path):
def RunDirectory(path, include='*'):
"""Open data files from a 'run' at European XFEL.
::
Expand All @@ -1304,10 +1304,13 @@ def RunDirectory(path):
----------
path: str
Path to the run directory containing HDF5 files.
include: str
Wildcard string to filter data files.
"""
files = [osp.join(path, f) for f in os.listdir(path) if f.endswith('.h5')]
files = [f for f in os.listdir(path) if f.endswith('.h5')]
files = [osp.join(path, f) for f in fnmatch.filter(files, include)]
if not files:
raise Exception("No HDF5 files found in {}".format(path))
raise Exception("No HDF5 files found in {} with glob pattern {}".format(path, include))
return DataCollection.from_paths(files)


Expand All @@ -1316,7 +1319,7 @@ def RunDirectory(path):
RunHandler = RunDirectory


def open_run(proposal, run, data='raw'):
def open_run(proposal, run, data='raw', include='*'):
"""Access EuXFEL data on the Maxwell cluster by proposal and run number.
::
Expand All @@ -1335,6 +1338,8 @@ def open_run(proposal, run, data='raw'):
data: str
'raw' or 'proc' (processed) to access data from one of those folders.
The default is 'raw'.
include: str
Wildcard string to filter data files.
"""
if isinstance(proposal, int):
proposal = 'p{:06d}'.format(proposal)
Expand All @@ -1348,4 +1353,4 @@ def open_run(proposal, run, data='raw'):
elif not run.startswith('r'):
run = 'r' + run.rjust(4, '0')

return RunDirectory(osp.join(prop_dir, data, run))
return RunDirectory(osp.join(prop_dir, data, run), include=include)
10 changes: 10 additions & 0 deletions karabo_data/tests/test_reader_mockdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,16 @@ def test_read_fxe_raw_run(mock_fxe_raw_run):
assert run.train_ids == list(range(10000, 10480))
run.info() # Smoke test

def test_read_fxe_raw_run_selective(mock_fxe_raw_run):
run = RunDirectory(mock_fxe_raw_run, include='*DA*')
assert run.train_ids == list(range(10000, 10480))
assert 'SA1_XTD2_XGM/DOOCS/MAIN' in run.control_sources
assert 'FXE_DET_LPD1M-1/DET/0CH0:xtdf' not in run.detector_sources
run = RunDirectory(mock_fxe_raw_run, include='*LPD*')
assert run.train_ids == list(range(10000, 10480))
assert 'SA1_XTD2_XGM/DOOCS/MAIN' not in run.control_sources
assert 'FXE_DET_LPD1M-1/DET/0CH0:xtdf' in run.detector_sources

def test_read_spb_proc_run(mock_spb_proc_run):
run = RunDirectory(mock_spb_proc_run) #Test for calib data
assert len(run.files) == 16 # only 16 detector modules for calib data
Expand Down

0 comments on commit f3d5637

Please sign in to comment.