Skip to content

Commit

Permalink
MRG: Fix stats examples and wording (mne-tools#5649)
Browse files Browse the repository at this point in the history
* FIX: Fix examples

* FIX: Fix wording

* FIX: Flake

* FIX: Sentence

* FIX: Bad noqa
  • Loading branch information
larsoner authored Nov 3, 2018
1 parent 0f9eb55 commit af89a09
Show file tree
Hide file tree
Showing 9 changed files with 83 additions and 70 deletions.
6 changes: 3 additions & 3 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,16 +275,16 @@
# latex_appendices = []

# If false, no module index is generated.
#latex_domain_indices = True
# latex_domain_indices = True

trim_doctests_flags = True

# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {
'python': ('https://docs.python.org/3', None),
'numpy': ('http://numpy.org/devdocs', None),
'numpy': ('https://www.numpy.org/devdocs', None),
'scipy': ('https://scipy.github.io/devdocs', None),
'matplotlib': ('http://matplotlib.org', None),
'matplotlib': ('https://matplotlib.org', None),
'sklearn': ('http://scikit-learn.org/stable', None),
'mayavi': ('http://docs.enthought.com/mayavi/mayavi', None),
'nibabel': ('http://nipy.org/nibabel', None),
Expand Down
1 change: 1 addition & 0 deletions doc/tutorial_links.inc
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
.. _fwer: https://en.wikipedia.org/wiki/Family-wise_error_rate
.. _fdr: https://en.wikipedia.org/wiki/False_discovery_rate
.. _ft_cluster: http://www.fieldtriptoolbox.org/faq/how_not_to_interpret_results_from_a_cluster-based_permutation_test
.. _ft_exch: https://mailman.science.ru.nl/pipermail/fieldtrip/2008-October/001794.html
61 changes: 30 additions & 31 deletions mne/stats/cluster_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ def _find_clusters_1dir(x, x_in, connectivity, max_step, t_power, ndimage):
sums = np.array([np.sum(x[c]) for c in clusters])
else:
sums = np.array([np.sum(np.sign(x[c]) * np.abs(x[c]) ** t_power)
for c in clusters])
for c in clusters])

return clusters, np.atleast_1d(sums)

Expand Down Expand Up @@ -517,8 +517,8 @@ def _setup_connectivity(connectivity, n_vertices, n_times):
# we claim to only use upper triangular part... not true here
connectivity = (connectivity + connectivity.transpose()).tocsr()
connectivity = [connectivity.indices[connectivity.indptr[i]:
connectivity.indptr[i + 1]] for i in
range(len(connectivity.indptr) - 1)]
connectivity.indptr[i + 1]]
for i in range(len(connectivity.indptr) - 1)]
return connectivity


Expand Down Expand Up @@ -878,12 +878,12 @@ def _permutation_cluster_test(X, threshold, n_permutations, tail, stat_fun,
this_include = step_down_include
logger.info('Permuting %d times%s...' % (len(orders), extra))
with ProgressBar(len(orders), verbose_bool='auto') as progress_bar:
H0 = parallel(my_do_perm_func(X_full, slices, threshold, tail,
connectivity, stat_fun, max_step, this_include,
partitions, t_power, order, sample_shape,
buffer_size, progress_bar.subset(idx))
for idx, order in split_list(
orders, n_jobs, idx=True))
H0 = parallel(
my_do_perm_func(X_full, slices, threshold, tail, connectivity,
stat_fun, max_step, this_include, partitions,
t_power, order, sample_shape, buffer_size,
progress_bar.subset(idx))
for idx, order in split_list(orders, n_jobs, idx=True))
# include original (true) ordering
if tail == -1: # up tail
orig = cluster_stats.min()
Expand Down Expand Up @@ -1078,19 +1078,14 @@ def permutation_cluster_1samp_test(
connectivity=None, verbose=None, n_jobs=1, seed=None, max_step=1,
exclude=None, step_down_p=0, t_power=1, out_type='mask',
check_disjoint=False, buffer_size=1000):
"""Non-parametric cluster-level 1 sample t-test.
From a array of observations, e.g. signal amplitudes or power spectrum
estimates etc., calculate if the observed mean significantly deviates
from 0. The procedure uses a cluster analysis with permutation test
for calculating corrected p-values. Randomized data are generated with
random sign flips. See [1]_ for more information.
"""Non-parametric cluster-level paired t-test.
Parameters
----------
X : array, shape=(n_samples, p[, q])
Array where the first dimension corresponds to the
samples (observations). X[k] can be a 1D or 2D array (time series
difference in paired samples (observations) in two conditions.
``X[k]`` can be a 1D or 2D array (time series
or TF image) associated to the kth observation.
threshold : float | dict | None
If threshold is None, it will choose a t-threshold equivalent to
Expand Down Expand Up @@ -1177,6 +1172,20 @@ def permutation_cluster_1samp_test(
Notes
-----
From an array of paired observations, e.g. a difference in signal
amplitudes or power spectra in two conditions, calculate if the data
distributions in the two conditions are significantly different.
The procedure uses a cluster analysis with permutation test
for calculating corrected p-values. Randomized data are generated with
random sign flips. See [1]_ for more information.
Because a 1-sample t-test on the difference in observations is
mathematically equivalent to a paired t-test, internally this function
computes a 1-sample t-test (by default) and uses sign flipping (always)
to perform permutations. This might not be suitable for the case where
there is truly a single observation under test; see
:ref:`sphx_glr_auto_tutorials_plot_background_statistics.py`.
If ``n_permutations >= 2 ** (n_samples - (tail == 0))``,
``n_permutations`` and ``seed`` will be ignored since an exact test
(full permutation test) will be performed.
Expand Down Expand Up @@ -1207,17 +1216,17 @@ def spatio_temporal_cluster_1samp_test(
max_step=1, spatial_exclude=None, step_down_p=0, t_power=1,
out_type='indices', check_disjoint=False, buffer_size=1000,
verbose=None):
"""Non-parametric cluster-level 1 sample t-test for spatio-temporal data.
"""Non-parametric cluster-level paired t-test for spatio-temporal data.
This function provides a convenient wrapper for data organized in the form
(observations x time x space) to use
:func:`mne.stats.permutation_cluster_1samp_test`. See [1]_ for more
information.
:func:`mne.stats.permutation_cluster_1samp_test`, which contains more
complete documentation.
Parameters
----------
X : array, shape (n_observations, n_times, n_vertices)
Array data.
Array data of the difference between two conditions.
threshold : float | dict | None
If threshold is None, it will choose a t-threshold equivalent to
p < 0.05 for the given number of observations (only valid when
Expand Down Expand Up @@ -1298,16 +1307,6 @@ def spatio_temporal_cluster_1samp_test(
H0 : array, shape (n_permutations,)
Max cluster level stats observed under permutation.
Notes
-----
If ``n_permutations >= 2 ** (n_samples - (tail == 0))``,
``n_permutations`` and ``seed`` will be ignored since an exact test
(full permutation test) will be performed.
If no initial clusters are found, i.e., all points in the true
distribution are below the threshold, then ``clusters``, ``cluster_pv``,
and ``H0`` will all be empty arrays.
References
----------
.. [1] Maris/Oostenveld, "Nonparametric statistical testing of
Expand Down
19 changes: 12 additions & 7 deletions mne/viz/topomap.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ def _check_outlines(pos, outlines, head_pos=None):
nose_x = np.array([0.18, 0, -0.18]) * radius
nose_y = np.array([radius - .004, radius * 1.15, radius - .004])
ear_x = np.array([.497, .510, .518, .5299, .5419, .54, .547,
.532, .510, .489])
.532, .510, .489])
ear_y = np.array([.0555, .0775, .0783, .0746, .0555, -.0055, -.0932,
-.1313, -.1384, -.1199])

Expand Down Expand Up @@ -1567,7 +1567,8 @@ def plot_evoked_topomap(evoked, times="auto", ch_type=None, layout=None,
top=1 - top_frame)
# find first index that's >= (to rounding error) to each time point
time_idx = [np.where(_time_mask(evoked.times, tmin=t,
tmax=None, sfreq=evoked.info['sfreq']))[0][0]
tmax=None,
sfreq=evoked.info['sfreq']))[0][0]
for t in times]

if average is None:
Expand Down Expand Up @@ -2392,9 +2393,9 @@ def _plot_corrmap(data, subjs, indices, ch_type, ica, label, show, outlines,
if len(picks) > p: # plot components by sets of 20
n_components = len(picks)
figs = [_plot_corrmap(data[k:k + p], subjs[k:k + p],
indices[k:k + p], ch_type, ica, label, show,
outlines=outlines, layout=layout, cmap=cmap,
contours=contours)
indices[k:k + p], ch_type, ica, label, show,
outlines=outlines, layout=layout, cmap=cmap,
contours=contours)
for k in range(0, n_components, p)]
return figs
elif np.isscalar(picks):
Expand Down Expand Up @@ -2505,8 +2506,8 @@ def plot_arrowmap(data, info_from, info_to=None, scale=1e-10, vmin=None,
Additional plotting parameters for plotting significant sensors.
Default (None) equals::
dict(marker='o', markerfacecolor='w', markeredgecolor='k',
linewidth=0, markersize=4)
dict(marker='o', markerfacecolor='w', markeredgecolor='k',
linewidth=0, markersize=4)
outlines : 'head' | 'skirt' | dict | None
The outlines to be drawn. If 'head', the default head scheme will be
Expand Down Expand Up @@ -2544,6 +2545,10 @@ def plot_arrowmap(data, info_from, info_to=None, scale=1e-10, vmin=None,
fig : matplotlib.figure.Figure
The Figure of the plot
Notes
-----
.. versionadded:: 0.17
References
----------
.. [1] D. Cohen, H. Hosaka
Expand Down
47 changes: 28 additions & 19 deletions tutorials/plot_background_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
# [1]_ and construct a toy dataset consisting of a 40 x 40 square with a
# "signal" present in the center with white noise added and a Gaussian
# smoothing kernel applied.

width = 40
n_subjects = 10
signal_mean = 100
Expand Down Expand Up @@ -99,14 +100,16 @@
#
# Parametric tests
# ^^^^^^^^^^^^^^^^
# Let's start with a **1-sample t-test**, which is a standard test
# for differences in paired sample means. This test is **parametric**,
# Let's start with a **paired t-test**, which is a standard test
# for differences in paired samples. Mathematically, it is equivalent
# to a 1-sample t-test on the difference between the samples in each condition.
# The paired t-test is **parametric**
# because it assumes that the underlying sample distribution is Gaussian, and
# is only valid in this case. This happens to be satisfied by our toy dataset,
# but is not always satisfied for neuroimaging data.
#
# In the context of our toy dataset, which has many voxels
# (:math:`40 \cdot 40 = 1600`), applying the 1-sample t-test is called a
# (:math:`40 \cdot 40 = 1600`), applying the paired t-test is called a
# *mass-univariate* approach as it treats each voxel independently.

titles = ['t']
Expand Down Expand Up @@ -181,29 +184,35 @@ def plot_t_p(t, p, title, mcc, axes=None):
# Non-parametric tests
# ^^^^^^^^^^^^^^^^^^^^
# Instead of assuming an underlying Gaussian distribution, we could instead
# use a **non-parametric resampling** method. Under the null hypothesis,
# we have the principle of **exchangeability**, which means that, if the null
# is true, we should be able to exchange conditions and not change the
# distribution of the test statistic.
#
# In the case of a two-tailed 1-sample t-test (which can also be used to test
# the difference between two conditions against zero), exchangeability means
# that we can flip the signs of our data. Therefore, we can construct the
# use a **non-parametric resampling** method. In the case of a paired t-test
# between two conditions A and B, which is mathematically equivalent to a
# one-sample t-test between the difference in the conditions A-B, under the
# null hypothesis we have the principle of **exchangeability**. This means
# that, if the null is true, we can exchange conditions and not change
# the distribution of the test statistic.
#
# When using a paired t-test, exchangeability thus means that we can flip the
# signs of the difference between A and B. Therefore, we can construct the
# **null distribution** values for each voxel by taking random subsets of
# samples (subjects), flipping the sign of their data, and recording the
# samples (subjects), flipping the sign of their difference, and recording the
# absolute value of the resulting statistic (we record the absolute value
# because we conduct a two-tailed test). The absolute value of the statistic
# evaluated on the veridical data can then be compared to this distribution,
# and the p-value is simply the proportion of null distribution values that
# are smaller.
#
# .. note:: In the case where ``n_permutations`` is large enough (or "all") so
# that the complete set of unique resampling exchanges can be done
# (which is :math:`2^{N_{samp}}-1` for a one-tailed and
# :math:`2^{N_{samp}-1}-1` for a two-tailed test, not counting the
# veridical distribution), instead of randomly exchanging conditions
# the null is formed from using all possible exchanges. This is known
# as a permutation test (or exact test).
# .. warning:: In the case of a true one-sample t-test, i.e. analyzing a single
# condition rather than the difference between two conditions,
# it is not clear where/how exchangeability applies; see
# `this FieldTrip discussion <ft_exch_>`_.
#
# In the case where ``n_permutations`` is large enough (or "all") so
# that the complete set of unique resampling exchanges can be done
# (which is :math:`2^{N_{samp}}-1` for a one-tailed and
# :math:`2^{N_{samp}-1}-1` for a two-tailed test, not counting the
# veridical distribution), instead of randomly exchanging conditions
# the null is formed from using all possible exchanges. This is known
# as a permutation test (or exact test).

# Here we have to do a bit of gymnastics to get our function to do
# a permutation test without correcting for multiple comparisons:
Expand Down
6 changes: 3 additions & 3 deletions tutorials/plot_stats_cluster_spatio_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
=================================================================
Tests if the evoked response is significantly different between
conditions across subjects (simulated here using one subject's data).
two conditions across subjects. Here just for demonstration purposes
we simulate data from multiple subjects using one subject's data.
The multiple comparisons problem is addressed with a cluster-level
permutation test across space and time.
"""
# Authors: Alexandre Gramfort <[email protected]>
# Eric Larson <[email protected]>
Expand Down Expand Up @@ -186,5 +186,5 @@
brain = stc_all_cluster_vis.plot(
hemi='both', views='lateral', subjects_dir=subjects_dir,
time_label='Duration significant (ms)', size=(800, 800),
smoothing_steps=5)
smoothing_steps=5, clim=dict(kind='value', pos_lims=[0, 1, 40]))
# brain.save_image('clusters.png')
5 changes: 3 additions & 2 deletions tutorials/plot_stats_cluster_spatio_temporal_2samp.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@
# shows all the clusters, weighted by duration
subjects_dir = op.join(data_path, 'subjects')
# blue blobs are for condition A != condition B
brain = stc_all_cluster_vis.plot('fsaverage', hemi='both', colormap='mne',
brain = stc_all_cluster_vis.plot('fsaverage', hemi='both',
views='lateral', subjects_dir=subjects_dir,
time_label='Duration significant (ms)')
time_label='Duration significant (ms)',
clim=dict(kind='value', lims=[0, 1, 40]))
brain.save_image('clusters.png')
Original file line number Diff line number Diff line change
Expand Up @@ -238,9 +238,9 @@ def stat_fun(*args):
# The brighter the color, the stronger the interaction between
# stimulus modality and stimulus location

brain = stc_all_cluster_vis.plot(subjects_dir=subjects_dir, colormap='mne',
views='lateral',
time_label='Duration significant (ms)')
brain = stc_all_cluster_vis.plot(subjects_dir=subjects_dir, views='lat',
time_label='Duration significant (ms)',
clim=dict(kind='value', lims=[0, 1, 40]))
brain.save_image('cluster-lh.png')
brain.show_view('medial')

Expand Down
2 changes: 0 additions & 2 deletions tutorials/plot_stats_cluster_time_frequency.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
"""
.. _tut_stats_cluster_sensor_2samp_tfr:
=========================================================================
Non-parametric between conditions cluster statistic on single trial power
=========================================================================
Expand Down

0 comments on commit af89a09

Please sign in to comment.