Skip to content

Commit

Permalink
Use Path in raw.filenames and epochs.filename (mne-tools#12843)
Browse files Browse the repository at this point in the history
Co-authored-by: Eric Larson <[email protected]>
  • Loading branch information
Mathieu Scheltienne and larsoner authored Sep 25, 2024
1 parent 381688f commit 10ff91a
Show file tree
Hide file tree
Showing 45 changed files with 422 additions and 319 deletions.
3 changes: 3 additions & 0 deletions doc/changes/devel/12843.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fixed a bug where split FIF files that were read and then appended to other
:class:`mne.io.Raw` instances had their ``BAD boundary`` annotations incorrectly offset
in samples by the number of split files, by `Eric Larson`_.
1 change: 1 addition & 0 deletions doc/changes/devel/12843.other.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve handling of filenames in ``raw.filenames`` by using :class:`~pathlib.Path` instead of :class:`str`, by `Mathieu Scheltienne`_.
43 changes: 22 additions & 21 deletions mne/_fiff/open.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,16 @@
# License: BSD-3-Clause
# Copyright the MNE-Python contributors.

import os.path as op
from gzip import GzipFile
from io import SEEK_SET, BytesIO
from pathlib import Path

import numpy as np
from scipy.sparse import issparse

from ..utils import _file_like, logger, verbose, warn
from ..utils import _check_fname, _file_like, _validate_type, logger, verbose, warn
from .constants import FIFF
from .tag import (
Tag,
_call_dict_names,
_matrix_info,
_read_tag_header,
read_tag,
)
from .tag import Tag, _call_dict_names, _matrix_info, _read_tag_header, read_tag
from .tree import dir_tree_find, make_dir_tree


Expand Down Expand Up @@ -46,12 +40,13 @@ def read(self, size=-1):
def _fiff_get_fid(fname):
"""Open a FIF file with no additional parsing."""
if _file_like(fname):
logger.debug("Using file-like I/O")
fid = _NoCloseRead(fname)
fid.seek(0)
else:
fname = str(fname)
if op.splitext(fname)[1].lower() == ".gz":
logger.debug("Using gzip")
_validate_type(fname, Path, "fname", extra="or file-like")
if fname.suffixes[-1] == ".gz":
logger.debug("Using gzip I/O")
fid = GzipFile(fname, "rb") # Open in binary mode
else:
logger.debug("Using normal I/O")
Expand All @@ -61,6 +56,7 @@ def _fiff_get_fid(fname):

def _get_next_fname(fid, fname, tree):
"""Get the next filename in split files."""
_validate_type(fname, (Path, None), "fname")
nodes_list = dir_tree_find(tree, FIFF.FIFFB_REF)
next_fname = None
for nodes in nodes_list:
Expand All @@ -72,16 +68,21 @@ def _get_next_fname(fid, fname, tree):
if role != FIFF.FIFFV_ROLE_NEXT_FILE:
next_fname = None
break
if ent.kind not in (FIFF.FIFF_REF_FILE_NAME, FIFF.FIFF_REF_FILE_NUM):
continue
# If we can't resolve it, assume/hope it's in the current directory
if fname is None:
fname = Path().resolve()
if ent.kind == FIFF.FIFF_REF_FILE_NAME:
tag = read_tag(fid, ent.pos)
next_fname = op.join(op.dirname(fname), tag.data)
next_fname = fname.parent / tag.data
if ent.kind == FIFF.FIFF_REF_FILE_NUM:
# Some files don't have the name, just the number. So
# we construct the name from the current name.
if next_fname is not None:
continue
next_num = read_tag(fid, ent.pos).data.item()
path, base = op.split(fname)
base = fname.name
idx = base.find(".")
idx2 = base.rfind("-")
num_str = base[idx2 + 1 : idx]
Expand All @@ -90,14 +91,13 @@ def _get_next_fname(fid, fname, tree):

if idx2 < 0 and next_num == 1:
# this is the first file, which may not be numbered
next_fname = op.join(
path,
f"{base[:idx]}-{next_num:d}.{base[idx + 1 :]}",
next_fname = (
fname.parent / f"{base[:idx]}-{next_num:d}.{base[idx + 1 :]}"
)
continue

next_fname = op.join(
path, f"{base[:idx2]}-{next_num:d}.{base[idx + 1 :]}"
next_fname = (
fname.parent / f"{base[:idx2]}-{next_num:d}.{base[idx + 1 :]}"
)
if next_fname is not None:
break
Expand Down Expand Up @@ -191,7 +191,7 @@ def _fiff_open(fname, fid, preload):
pos = tag.next_pos
directory.append(tag)

tree, _ = make_dir_tree(fid, directory)
tree, _ = make_dir_tree(fid, directory, indent=1)

logger.debug("[done]")

Expand Down Expand Up @@ -247,7 +247,8 @@ def show_fiff(
raise ValueError("output must be list or str")
if isinstance(tag, str): # command mne show_fiff passes string
tag = int(tag)
f, tree, directory = fiff_open(fname)
fname = _check_fname(fname, "read", True)
f, tree, _ = fiff_open(fname)
# This gets set to 0 (unknown) by fiff_open, but FIFFB_ROOT probably
# makes more sense for display
tree["block"] = FIFF.FIFFB_ROOT
Expand Down
13 changes: 9 additions & 4 deletions mne/_fiff/tests/test_what.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def test_what(tmp_path, verbose_debug):
"""Test mne.what."""
pytest.importorskip("sklearn")
# ICA
ica = ICA(max_iter=1)
ica = ICA(max_iter=1, random_state=0)
raw = RawArray(np.random.RandomState(0).randn(3, 10), create_info(3, 1000.0, "eeg"))
with _record_warnings(): # convergence sometimes
ica.fit(raw)
Expand All @@ -33,30 +33,35 @@ def test_what(tmp_path, verbose_debug):
# test files
fnames = glob.glob(str(data_path / "MEG" / "sample" / "*.fif"))
fnames += glob.glob(str(data_path / "subjects" / "sample" / "bem" / "*.fif"))
fnames += [str(fname)]
fnames = sorted(fnames)
want_dict = dict(
eve="events",
ave="evoked",
cov="cov",
ica="ica",
inv="inverse",
fwd="forward",
trans="transform",
proj="proj",
raw="raw",
meg="raw",
sol="bem solution",
bem="bem surfaces",
src="src",
dense="bem surfaces",
sparse="bem surfaces",
head="bem surfaces",
fiducials="fiducials",
)
got = set()
for fname in fnames:
print(fname)
kind = Path(fname).stem.split("-")[-1]
if len(kind) > 5:
kind = kind.split("_")[-1]
this = what(fname)
assert this == want_dict[kind]
assert this == want_dict[kind], fname
print()
got.add(kind)
assert set(want_dict) == got
fname = data_path / "MEG" / "sample" / "sample_audvis-ave_xfit.dip"
assert what(fname) == "unknown"
15 changes: 9 additions & 6 deletions mne/_fiff/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def make_dir_tree(fid, directory, start=0, indent=0, verbose=None):
else:
block = 0

logger.debug(" " * indent + f"start {{ {block}")
start_separate = False

this = start

Expand All @@ -64,6 +64,9 @@ def make_dir_tree(fid, directory, start=0, indent=0, verbose=None):
while this < len(directory):
if directory[this].kind == FIFF.FIFF_BLOCK_START:
if this != start:
if not start_separate:
start_separate = True
logger.debug(" " * indent + f"start {{ {block}")
child, this = make_dir_tree(fid, directory, this, indent + 1)
tree["nchild"] += 1
tree["children"].append(child)
Expand Down Expand Up @@ -96,10 +99,10 @@ def make_dir_tree(fid, directory, start=0, indent=0, verbose=None):
if tree["nent"] == 0:
tree["directory"] = None

logger.debug(
" " * (indent + 1)
+ f"block = {tree['block']} nent = {tree['nent']} nchild = {tree['nchild']}"
)
logger.debug(" " * indent + f"end }} {block:d}")
content = f"block = {tree['block']} nent = {tree['nent']} nchild = {tree['nchild']}"
if start_separate:
logger.debug(" " * indent + f"end }} {content}")
else:
logger.debug(" " * indent + content)
last = this
return tree, last
2 changes: 1 addition & 1 deletion mne/_fiff/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def _read_segments_file(
# Read up to 100 MB of data at a time, block_size is in data samples
block_size = ((int(100e6) // n_bytes) // n_channels) * n_channels
block_size = min(data_left, block_size)
with open(raw._filenames[fi], "rb", buffering=0) as fid:
with open(raw.filenames[fi], "rb", buffering=0) as fid:
fid.seek(data_offset)
# extract data in chunks
for sample_start in np.arange(0, data_left, block_size) // n_channels:
Expand Down
2 changes: 1 addition & 1 deletion mne/_fiff/what.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def what(fname):
from ..transforms import read_trans
from .meas_info import read_fiducials

_check_fname(fname, overwrite="read", must_exist=True)
fname = _check_fname(fname, overwrite="read", must_exist=True)
checks = OrderedDict()
checks["raw"] = read_raw_fif
checks["ica"] = read_ica
Expand Down
70 changes: 32 additions & 38 deletions mne/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# Copyright the MNE-Python contributors.

import json
import os.path as op
import re
import warnings
from collections import Counter, OrderedDict
Expand Down Expand Up @@ -1194,48 +1193,43 @@ def read_annotations(
from .io.edf.edf import _read_annotations_edf
from .io.eeglab.eeglab import _read_annotations_eeglab

fname = str(
_check_fname(
fname,
overwrite="read",
must_exist=True,
need_dir=str(fname).endswith(".ds"), # for CTF
name="fname",
)
fname = _check_fname(
fname,
overwrite="read",
must_exist=True,
need_dir=str(fname).endswith(".ds"), # for CTF
name="fname",
)
name = op.basename(fname)
if name.endswith(("fif", "fif.gz")):
readers = {
".csv": _read_annotations_csv,
".cnt": _read_annotations_cnt,
".ds": _read_annotations_ctf,
".cef": _read_annotations_curry,
".set": _read_annotations_eeglab,
".edf": _read_annotations_edf,
".bdf": _read_annotations_edf,
".gdf": _read_annotations_edf,
".vmrk": _read_annotations_brainvision,
".amrk": _read_annotations_brainvision,
".txt": _read_annotations_txt,
}
kwargs = {
".vmrk": {"sfreq": sfreq, "ignore_marker_types": ignore_marker_types},
".amrk": {"sfreq": sfreq, "ignore_marker_types": ignore_marker_types},
".cef": {"sfreq": sfreq},
".set": {"uint16_codec": uint16_codec},
".edf": {"encoding": encoding},
".bdf": {"encoding": encoding},
".gdf": {"encoding": encoding},
}
if fname.suffix in readers:
annotations = readers[fname.suffix](fname, **kwargs.get(fname.suffix, {}))
elif fname.name.endswith(("fif", "fif.gz")):
# Read FiF files
ff, tree, _ = fiff_open(fname, preload=False)
with ff as fid:
annotations = _read_annotations_fif(fid, tree)
elif name.endswith("txt"):
annotations = _read_annotations_txt(fname)

elif name.endswith(("vmrk", "amrk")):
annotations = _read_annotations_brainvision(
fname, sfreq=sfreq, ignore_marker_types=ignore_marker_types
)

elif name.endswith("csv"):
annotations = _read_annotations_csv(fname)

elif name.endswith("cnt"):
annotations = _read_annotations_cnt(fname)

elif name.endswith("ds"):
annotations = _read_annotations_ctf(fname)

elif name.endswith("cef"):
annotations = _read_annotations_curry(fname, sfreq=sfreq)

elif name.endswith("set"):
annotations = _read_annotations_eeglab(fname, uint16_codec=uint16_codec)

elif name.endswith(("edf", "bdf", "gdf")):
annotations = _read_annotations_edf(fname, encoding=encoding)

elif name.startswith("events_") and fname.endswith("mat"):
elif fname.name.startswith("events_") and fname.suffix == ".mat":
annotations = _read_brainstorm_annotations(fname)
else:
raise OSError(f'Unknown annotation file format "{fname}"')
Expand Down
2 changes: 1 addition & 1 deletion mne/channels/montage.py
Original file line number Diff line number Diff line change
Expand Up @@ -836,7 +836,7 @@ def read_dig_fif(fname):
read_dig_localite
make_dig_montage
"""
_check_fname(fname, overwrite="read", must_exist=True)
fname = _check_fname(fname, overwrite="read", must_exist=True)
# Load the dig data
f, tree = fiff_open(fname)[:2]
with f as fid:
Expand Down
2 changes: 1 addition & 1 deletion mne/chpi.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def extract_chpi_locs_kit(raw, stim_channel="MISC 064", *, verbose=None):
dtype = np.dtype([("good", "<u4"), ("data", "<f8", (4,))])
assert dtype.itemsize == header["size"], (dtype.itemsize, header["size"])
all_data = list()
for fname in raw._filenames:
for fname in raw.filenames:
with open(fname) as fid:
fid.seek(header["offset"])
all_data.append(
Expand Down
2 changes: 1 addition & 1 deletion mne/commands/tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def test_make_scalp_surfaces(tmp_path, monkeypatch):
dense_fname = op.join(subj_dir, "sample-head-dense.fif")
medium_fname = op.join(subj_dir, "sample-head-medium.fif")
with ArgvSetter(cmd, disable_stdout=False, disable_stderr=False):
monkeypatch.delenv("FREESURFER_HOME")
monkeypatch.delenv("FREESURFER_HOME", raising=False)
with pytest.raises(RuntimeError, match="The FreeSurfer environ"):
mne_make_scalp_surfaces.run()
shutil.copy(op.join(surf_path, "lh.seghead"), surf_path_new)
Expand Down
21 changes: 15 additions & 6 deletions mne/epochs.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ class BaseEpochs(
.. versionadded:: 0.16
%(drop_log)s
filename : str | None
filename : Path | None
The filename (if the epochs are read from disk).
%(metadata_epochs)s
Expand Down Expand Up @@ -683,7 +683,7 @@ def __init__(
# more memory safe in most instances
for ii, epoch in enumerate(self._data):
self._data[ii] = np.dot(self._projector, epoch)
self._filename = str(filename) if filename is not None else filename
self.filename = filename if filename is not None else filename
if raw_sfreq is None:
raw_sfreq = self.info["sfreq"]
self._raw_sfreq = raw_sfreq
Expand Down Expand Up @@ -2013,10 +2013,19 @@ def apply_function(
return self

@property
def filename(self):
"""The filename."""
def filename(self) -> Path | None:
"""The filename if the epochs are loaded from disk.
:type: :class:`pathlib.Path` | ``None``
"""
return self._filename

@filename.setter
def filename(self, value):
if value is not None:
value = _check_fname(value, overwrite="read", must_exist=True)
self._filename = value

def __repr__(self):
"""Build string representation."""
s = f"{len(self.events)} events "
Expand Down Expand Up @@ -4266,15 +4275,15 @@ def __init__(self, fname, proj=True, preload=True, verbose=None):
filetype="epochs",
endings=("-epo.fif", "-epo.fif.gz", "_epo.fif", "_epo.fif.gz"),
)
fname = str(_check_fname(fname=fname, must_exist=True, overwrite="read"))
fname = _check_fname(fname=fname, must_exist=True, overwrite="read")
elif not preload:
raise ValueError("preload must be used with file-like objects")

fnames = [fname]
fname_rep = _get_fname_rep(fname)
ep_list = list()
raw = list()
for fname in fnames:
fname_rep = _get_fname_rep(fname)
logger.info(f"Reading {fname_rep} ...")
fid, tree, _ = fiff_open(fname, preload=preload)
next_fname = _get_next_fname(fid, fname, tree)
Expand Down
Loading

0 comments on commit 10ff91a

Please sign in to comment.