Use Path in raw.filenames and epochs.filename (mne-tools#12843)

Co-authored-by: Eric Larson <[email protected]>
minajamshidi · Sep 25, 2024 · 10ff91a · 10ff91a
1 parent 381688f
commit 10ff91a
Show file tree

Hide file tree

Showing 45 changed files with 422 additions and 319 deletions.
diff --git a/doc/changes/devel/12843.bugfix.rst b/doc/changes/devel/12843.bugfix.rst
@@ -0,0 +1,3 @@
+Fixed a bug where split FIF files that were read and then appended to other
+:class:`mne.io.Raw` instances had their ``BAD boundary`` annotations incorrectly offset
+in samples by the number of split files, by `Eric Larson`_.
diff --git a/doc/changes/devel/12843.other.rst b/doc/changes/devel/12843.other.rst
@@ -0,0 +1 @@
+Improve handling of filenames in ``raw.filenames`` by using :class:`~pathlib.Path` instead of :class:`str`, by `Mathieu Scheltienne`_.
diff --git a/mne/_fiff/open.py b/mne/_fiff/open.py
@@ -2,22 +2,16 @@
 # License: BSD-3-Clause
 # Copyright the MNE-Python contributors.
 
-import os.path as op
 from gzip import GzipFile
 from io import SEEK_SET, BytesIO
+from pathlib import Path
 
 import numpy as np
 from scipy.sparse import issparse
 
-from ..utils import _file_like, logger, verbose, warn
+from ..utils import _check_fname, _file_like, _validate_type, logger, verbose, warn
 from .constants import FIFF
-from .tag import (
-    Tag,
-    _call_dict_names,
-    _matrix_info,
-    _read_tag_header,
-    read_tag,
-)
+from .tag import Tag, _call_dict_names, _matrix_info, _read_tag_header, read_tag
 from .tree import dir_tree_find, make_dir_tree
 
 
@@ -46,12 +40,13 @@ def read(self, size=-1):
 def _fiff_get_fid(fname):
     """Open a FIF file with no additional parsing."""
     if _file_like(fname):
+        logger.debug("Using file-like I/O")
         fid = _NoCloseRead(fname)
         fid.seek(0)
     else:
-        fname = str(fname)
-        if op.splitext(fname)[1].lower() == ".gz":
-            logger.debug("Using gzip")
+        _validate_type(fname, Path, "fname", extra="or file-like")
+        if fname.suffixes[-1] == ".gz":
+            logger.debug("Using gzip I/O")
             fid = GzipFile(fname, "rb")  # Open in binary mode
         else:
             logger.debug("Using normal I/O")
@@ -61,6 +56,7 @@ def _fiff_get_fid(fname):
 
 def _get_next_fname(fid, fname, tree):
     """Get the next filename in split files."""
+    _validate_type(fname, (Path, None), "fname")
     nodes_list = dir_tree_find(tree, FIFF.FIFFB_REF)
     next_fname = None
     for nodes in nodes_list:
@@ -72,16 +68,21 @@ def _get_next_fname(fid, fname, tree):
                 if role != FIFF.FIFFV_ROLE_NEXT_FILE:
                     next_fname = None
                     break
+            if ent.kind not in (FIFF.FIFF_REF_FILE_NAME, FIFF.FIFF_REF_FILE_NUM):
+                continue
+            # If we can't resolve it, assume/hope it's in the current directory
+            if fname is None:
+                fname = Path().resolve()
             if ent.kind == FIFF.FIFF_REF_FILE_NAME:
                 tag = read_tag(fid, ent.pos)
-                next_fname = op.join(op.dirname(fname), tag.data)
+                next_fname = fname.parent / tag.data
             if ent.kind == FIFF.FIFF_REF_FILE_NUM:
                 # Some files don't have the name, just the number. So
                 # we construct the name from the current name.
                 if next_fname is not None:
                     continue
                 next_num = read_tag(fid, ent.pos).data.item()
-                path, base = op.split(fname)
+                base = fname.name
                 idx = base.find(".")
                 idx2 = base.rfind("-")
                 num_str = base[idx2 + 1 : idx]
@@ -90,14 +91,13 @@ def _get_next_fname(fid, fname, tree):
 
                 if idx2 < 0 and next_num == 1:
                     # this is the first file, which may not be numbered
-                    next_fname = op.join(
-                        path,
-                        f"{base[:idx]}-{next_num:d}.{base[idx + 1 :]}",
+                    next_fname = (
+                        fname.parent / f"{base[:idx]}-{next_num:d}.{base[idx + 1 :]}"
                     )
                     continue
 
-                next_fname = op.join(
-                    path, f"{base[:idx2]}-{next_num:d}.{base[idx + 1 :]}"
+                next_fname = (
+                    fname.parent / f"{base[:idx2]}-{next_num:d}.{base[idx + 1 :]}"
                 )
         if next_fname is not None:
             break
@@ -191,7 +191,7 @@ def _fiff_open(fname, fid, preload):
             pos = tag.next_pos
             directory.append(tag)
 
-    tree, _ = make_dir_tree(fid, directory)
+    tree, _ = make_dir_tree(fid, directory, indent=1)
 
     logger.debug("[done]")
 
@@ -247,7 +247,8 @@ def show_fiff(
         raise ValueError("output must be list or str")
     if isinstance(tag, str):  # command mne show_fiff passes string
         tag = int(tag)
-    f, tree, directory = fiff_open(fname)
+    fname = _check_fname(fname, "read", True)
+    f, tree, _ = fiff_open(fname)
     # This gets set to 0 (unknown) by fiff_open, but FIFFB_ROOT probably
     # makes more sense for display
     tree["block"] = FIFF.FIFFB_ROOT

diff --git a/mne/_fiff/tests/test_what.py b/mne/_fiff/tests/test_what.py
@@ -23,7 +23,7 @@ def test_what(tmp_path, verbose_debug):
     """Test mne.what."""
     pytest.importorskip("sklearn")
     # ICA
-    ica = ICA(max_iter=1)
+    ica = ICA(max_iter=1, random_state=0)
     raw = RawArray(np.random.RandomState(0).randn(3, 10), create_info(3, 1000.0, "eeg"))
     with _record_warnings():  # convergence sometimes
         ica.fit(raw)
@@ -33,30 +33,35 @@ def test_what(tmp_path, verbose_debug):
     # test files
     fnames = glob.glob(str(data_path / "MEG" / "sample" / "*.fif"))
     fnames += glob.glob(str(data_path / "subjects" / "sample" / "bem" / "*.fif"))
+    fnames += [str(fname)]
     fnames = sorted(fnames)
     want_dict = dict(
         eve="events",
         ave="evoked",
         cov="cov",
+        ica="ica",
         inv="inverse",
         fwd="forward",
         trans="transform",
         proj="proj",
         raw="raw",
-        meg="raw",
         sol="bem solution",
         bem="bem surfaces",
         src="src",
         dense="bem surfaces",
-        sparse="bem surfaces",
         head="bem surfaces",
         fiducials="fiducials",
     )
+    got = set()
     for fname in fnames:
+        print(fname)
         kind = Path(fname).stem.split("-")[-1]
         if len(kind) > 5:
             kind = kind.split("_")[-1]
         this = what(fname)
-        assert this == want_dict[kind]
+        assert this == want_dict[kind], fname
+        print()
+        got.add(kind)
+    assert set(want_dict) == got
     fname = data_path / "MEG" / "sample" / "sample_audvis-ave_xfit.dip"
     assert what(fname) == "unknown"
diff --git a/mne/_fiff/tree.py b/mne/_fiff/tree.py
@@ -48,7 +48,7 @@ def make_dir_tree(fid, directory, start=0, indent=0, verbose=None):
     else:
         block = 0
 
-    logger.debug("    " * indent + f"start {{ {block}")
+    start_separate = False
 
     this = start
 
@@ -64,6 +64,9 @@ def make_dir_tree(fid, directory, start=0, indent=0, verbose=None):
     while this < len(directory):
         if directory[this].kind == FIFF.FIFF_BLOCK_START:
             if this != start:
+                if not start_separate:
+                    start_separate = True
+                    logger.debug("    " * indent + f"start {{ {block}")
                 child, this = make_dir_tree(fid, directory, this, indent + 1)
                 tree["nchild"] += 1
                 tree["children"].append(child)
@@ -96,10 +99,10 @@ def make_dir_tree(fid, directory, start=0, indent=0, verbose=None):
     if tree["nent"] == 0:
         tree["directory"] = None
 
-    logger.debug(
-        "    " * (indent + 1)
-        + f"block = {tree['block']} nent = {tree['nent']} nchild = {tree['nchild']}"
-    )
-    logger.debug("    " * indent + f"end }} {block:d}")
+    content = f"block = {tree['block']} nent = {tree['nent']} nchild = {tree['nchild']}"
+    if start_separate:
+        logger.debug("    " * indent + f"end }} {content}")
+    else:
+        logger.debug("    " * indent + content)
     last = this
     return tree, last
diff --git a/mne/_fiff/utils.py b/mne/_fiff/utils.py
@@ -224,7 +224,7 @@ def _read_segments_file(
     # Read up to 100 MB of data at a time, block_size is in data samples
     block_size = ((int(100e6) // n_bytes) // n_channels) * n_channels
     block_size = min(data_left, block_size)
-    with open(raw._filenames[fi], "rb", buffering=0) as fid:
+    with open(raw.filenames[fi], "rb", buffering=0) as fid:
         fid.seek(data_offset)
         # extract data in chunks
         for sample_start in np.arange(0, data_left, block_size) // n_channels:

diff --git a/mne/_fiff/what.py b/mne/_fiff/what.py
@@ -39,7 +39,7 @@ def what(fname):
     from ..transforms import read_trans
     from .meas_info import read_fiducials
 
-    _check_fname(fname, overwrite="read", must_exist=True)
+    fname = _check_fname(fname, overwrite="read", must_exist=True)
     checks = OrderedDict()
     checks["raw"] = read_raw_fif
     checks["ica"] = read_ica

diff --git a/mne/annotations.py b/mne/annotations.py
@@ -3,7 +3,6 @@
 # Copyright the MNE-Python contributors.
 
 import json
-import os.path as op
 import re
 import warnings
 from collections import Counter, OrderedDict
@@ -1194,48 +1193,43 @@ def read_annotations(
     from .io.edf.edf import _read_annotations_edf
     from .io.eeglab.eeglab import _read_annotations_eeglab
 
-    fname = str(
-        _check_fname(
-            fname,
-            overwrite="read",
-            must_exist=True,
-            need_dir=str(fname).endswith(".ds"),  # for CTF
-            name="fname",
-        )
+    fname = _check_fname(
+        fname,
+        overwrite="read",
+        must_exist=True,
+        need_dir=str(fname).endswith(".ds"),  # for CTF
+        name="fname",
     )
-    name = op.basename(fname)
-    if name.endswith(("fif", "fif.gz")):
+    readers = {
+        ".csv": _read_annotations_csv,
+        ".cnt": _read_annotations_cnt,
+        ".ds": _read_annotations_ctf,
+        ".cef": _read_annotations_curry,
+        ".set": _read_annotations_eeglab,
+        ".edf": _read_annotations_edf,
+        ".bdf": _read_annotations_edf,
+        ".gdf": _read_annotations_edf,
+        ".vmrk": _read_annotations_brainvision,
+        ".amrk": _read_annotations_brainvision,
+        ".txt": _read_annotations_txt,
+    }
+    kwargs = {
+        ".vmrk": {"sfreq": sfreq, "ignore_marker_types": ignore_marker_types},
+        ".amrk": {"sfreq": sfreq, "ignore_marker_types": ignore_marker_types},
+        ".cef": {"sfreq": sfreq},
+        ".set": {"uint16_codec": uint16_codec},
+        ".edf": {"encoding": encoding},
+        ".bdf": {"encoding": encoding},
+        ".gdf": {"encoding": encoding},
+    }
+    if fname.suffix in readers:
+        annotations = readers[fname.suffix](fname, **kwargs.get(fname.suffix, {}))
+    elif fname.name.endswith(("fif", "fif.gz")):
         # Read FiF files
         ff, tree, _ = fiff_open(fname, preload=False)
         with ff as fid:
             annotations = _read_annotations_fif(fid, tree)
-    elif name.endswith("txt"):
-        annotations = _read_annotations_txt(fname)
-
-    elif name.endswith(("vmrk", "amrk")):
-        annotations = _read_annotations_brainvision(
-            fname, sfreq=sfreq, ignore_marker_types=ignore_marker_types
-        )
-
-    elif name.endswith("csv"):
-        annotations = _read_annotations_csv(fname)
-
-    elif name.endswith("cnt"):
-        annotations = _read_annotations_cnt(fname)
-
-    elif name.endswith("ds"):
-        annotations = _read_annotations_ctf(fname)
-
-    elif name.endswith("cef"):
-        annotations = _read_annotations_curry(fname, sfreq=sfreq)
-
-    elif name.endswith("set"):
-        annotations = _read_annotations_eeglab(fname, uint16_codec=uint16_codec)
-
-    elif name.endswith(("edf", "bdf", "gdf")):
-        annotations = _read_annotations_edf(fname, encoding=encoding)
-
-    elif name.startswith("events_") and fname.endswith("mat"):
+    elif fname.name.startswith("events_") and fname.suffix == ".mat":
         annotations = _read_brainstorm_annotations(fname)
     else:
         raise OSError(f'Unknown annotation file format "{fname}"')

diff --git a/mne/channels/montage.py b/mne/channels/montage.py
@@ -836,7 +836,7 @@ def read_dig_fif(fname):
     read_dig_localite
     make_dig_montage
     """
-    _check_fname(fname, overwrite="read", must_exist=True)
+    fname = _check_fname(fname, overwrite="read", must_exist=True)
     # Load the dig data
     f, tree = fiff_open(fname)[:2]
     with f as fid:

diff --git a/mne/chpi.py b/mne/chpi.py
@@ -306,7 +306,7 @@ def extract_chpi_locs_kit(raw, stim_channel="MISC 064", *, verbose=None):
     dtype = np.dtype([("good", "<u4"), ("data", "<f8", (4,))])
     assert dtype.itemsize == header["size"], (dtype.itemsize, header["size"])
     all_data = list()
-    for fname in raw._filenames:
+    for fname in raw.filenames:
         with open(fname) as fid:
             fid.seek(header["offset"])
             all_data.append(

diff --git a/mne/commands/tests/test_commands.py b/mne/commands/tests/test_commands.py
@@ -194,7 +194,7 @@ def test_make_scalp_surfaces(tmp_path, monkeypatch):
     dense_fname = op.join(subj_dir, "sample-head-dense.fif")
     medium_fname = op.join(subj_dir, "sample-head-medium.fif")
     with ArgvSetter(cmd, disable_stdout=False, disable_stderr=False):
-        monkeypatch.delenv("FREESURFER_HOME")
+        monkeypatch.delenv("FREESURFER_HOME", raising=False)
         with pytest.raises(RuntimeError, match="The FreeSurfer environ"):
             mne_make_scalp_surfaces.run()
         shutil.copy(op.join(surf_path, "lh.seghead"), surf_path_new)

diff --git a/mne/epochs.py b/mne/epochs.py
@@ -411,7 +411,7 @@ class BaseEpochs(
 
         .. versionadded:: 0.16
     %(drop_log)s
-    filename : str | None
+    filename : Path | None
         The filename (if the epochs are read from disk).
     %(metadata_epochs)s
 
@@ -683,7 +683,7 @@ def __init__(
             # more memory safe in most instances
             for ii, epoch in enumerate(self._data):
                 self._data[ii] = np.dot(self._projector, epoch)
-        self._filename = str(filename) if filename is not None else filename
+        self.filename = filename if filename is not None else filename
         if raw_sfreq is None:
             raw_sfreq = self.info["sfreq"]
         self._raw_sfreq = raw_sfreq
@@ -2013,10 +2013,19 @@ def apply_function(
         return self
 
     @property
-    def filename(self):
-        """The filename."""
+    def filename(self) -> Path | None:
+        """The filename if the epochs are loaded from disk.
+
+        :type: :class:`pathlib.Path` | ``None``
+        """
         return self._filename
 
+    @filename.setter
+    def filename(self, value):
+        if value is not None:
+            value = _check_fname(value, overwrite="read", must_exist=True)
+        self._filename = value
+
     def __repr__(self):
         """Build string representation."""
         s = f"{len(self.events)} events "
@@ -4266,15 +4275,15 @@ def __init__(self, fname, proj=True, preload=True, verbose=None):
                 filetype="epochs",
                 endings=("-epo.fif", "-epo.fif.gz", "_epo.fif", "_epo.fif.gz"),
             )
-            fname = str(_check_fname(fname=fname, must_exist=True, overwrite="read"))
+            fname = _check_fname(fname=fname, must_exist=True, overwrite="read")
         elif not preload:
             raise ValueError("preload must be used with file-like objects")
 
         fnames = [fname]
+        fname_rep = _get_fname_rep(fname)
         ep_list = list()
         raw = list()
         for fname in fnames:
-            fname_rep = _get_fname_rep(fname)
             logger.info(f"Reading {fname_rep} ...")
             fid, tree, _ = fiff_open(fname, preload=preload)
             next_fname = _get_next_fname(fid, fname, tree)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Improve handling of filenames in ``raw.filenames`` by using :class:`~pathlib.Path` instead of :class:`str`, by `Mathieu Scheltienne`_.