forked from LeonDLotter/MAsync
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoverlap.py
267 lines (230 loc) · 11.7 KB
/
overlap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Dec 18 12:24:59 2021
@author: Leon D. Lotter
"""
import nilearn as nl
from nilearn.image import resample_to_img, math_img, load_img, new_img_like
from nilearn.glm import threshold_stats_img
from nimare.dataset import Dataset
from nimare.meta.cbma.ale import ALE
from nimare.stats import null_to_p
from nimare.transforms import p_to_z
from joblib import Parallel, delayed
from tqdm.auto import tqdm
import pandas as pd
import numpy as np
from .utils_image import get_cluster_stats
import logging
lgr = logging.getLogger(__name__)
lgr.setLevel(logging.INFO)
def rel_abs_distributions(roi_vols, target_vol,
roi_labs=None, target_labs=None):
"""
Quantifies overlap of a list of ROIs with, e.g., a given target network
parcellation.
Relative distribution = proportion of 'roi_vols' voxels within a given
'target_vol' cluster vs. all 'roi_vols' voxels.
Absolute distribution = proportion of 'roi_vols'-voxels within a given
'target_vol' cluster vs. all voxels within this 'target_vol' cluster.
Input:
roi_vols = list of roi volumes
roi_labs = list of roi names for output table
target_vol = target volume with clusters/parcels indexed from 1 to n
target_lab = list of target cluster/parcel names for output table
All volumes are resampled to space and voxelsize of the first roi.
Output:
pandas dataframe with relative and absolute distributions of each ROI
within each cluster of the target volume.
"""
# resample target volume to space of first roi volume
t_vol = resample_to_img(target_vol, roi_vols)
t_mat = t_vol.get_fdata().round() # get target data matrix
t_idc = np.unique(t_mat)[1:].astype(int) # get cluster indices
# target labels
if target_labs is None:
target_labs = [f'target{i}' for i in t_idc]
if len(t_idc) != len(target_labs):
lgr.error(f'Number of target clusters/parcels ({len(t_idc)}) does not '
f'match number of given labels ({len(target_labs)})!')
# roi labels
if roi_labs is None:
roi_labs = [f'roi{i}' for i in range(0, len(roi_vols))]
if len(roi_vols) != len(roi_labs):
lgr.error(f'Number of ROI volumes ({len(roi_vols)}) does not '
f'match number of given labels ({len(roi_labs)})!')
# Results table with target cluster names and total voxel numbers per cluster
res = pd.DataFrame({'targetNr': t_idc,
'targetName': target_labs,
'nvox_target': [len(t_mat[t_mat == idx]) for idx in t_idc]})
# Iterate over ROI list
res_out = []
for roi, roi_lab in zip(roi_vols, roi_labs):
r_vol = resample_to_img(roi, roi_vols[0]) # resample to first roi space
r_bin = math_img('img > 0', img=r_vol) # binarize, just in case
r_mat = r_bin.get_fdata() # get roi data matrix
rt_mat = r_mat * t_mat # intersect roi with target matrix
# RESULTS
res_roi = res.copy()
# ROI name
res_roi['roiName'] = roi_lab
# voxels in ROI
res_roi['nvox_roi'] = len(r_mat[r_mat == 1])
# overlapping voxels per target cluster
res_roi['nvox_overlap'] = [len(rt_mat[rt_mat == nr]) for nr in res_roi.targetNr]
# distributions
res_roi['relDistr'] = res_roi.nvox_overlap / res_roi.nvox_roi # relative
res_roi['absDistr'] = res_roi.nvox_overlap / res_roi.nvox_target # absolute
# save to list
res_out.append(res_roi)
res_df = pd.concat(res_out).reset_index()
res_df = res_df[['roiName', 'nvox_roi', 'targetNr', 'targetName',
'nvox_target', 'nvox_overlap', 'relDistr', 'absDistr']]
return(res_df)
#=============================================================================
def distribution_null_test(ds, target_vol, roi_vol=None,
vox_thresh=0.001, cluster_thresh=25, thresh_type="size",
n_perm=1000, seed=None, n_proc=-1,
target_labs=None, template=None):
"""
Assignes permutation-based p-values to the overlap between meta-analytically derived ROIs with,
e.g., a given target network parcellation.
Relative distribution = proportion of 'roi_vols' voxels within a given
'target_vol' cluster vs. all 'roi_vols' voxels.
Absolute distribution = proportion of 'roi_vols'-voxels within a given
'target_vol' cluster vs. all voxels within this 'target_vol' cluster.
If the ROI volume {roi_vol} is None, a true ROI map is calculated from the input dataset {ds}.
The 'true' distributions across the target ROIs {target_vol} are calculated. Based on the
numbers of subjects and foci in {ds}, {n_perm} null datasets are randomly created. On each of
these datasets, an ALE ROI map is calculated using the thresholding settings {vox_thresh},
{cluster_thresh} and {thresh_type}. Relative/Absolute distributions are calculated for each
of these null ROI maps and the resulting null distributions are compared to the true values
to estimate positive-sided p-values for each ROI in {target_vol}.
Input:
ds = NiMARE dataset
target_vol = target volume with clusters/parcels indexed from 1 to n
roi_vol = input volume. If None, will be generated from NiMARE dataset
vox_thresh = voxel threshold as p-value
cluster_thresh = cluster mass or size threshold, depending on {thresh_type}
thresh_type = 'mass' or 'size'
n_perm = number of iterations/ null datasets/ maps
seed = seed for reproducability
n_proc = number of parallel processes, -1 = number of CPU cores
target_labs = list of target cluster/parcel names for output table
template = template (i.e. grey matter) from which to sample random foci coordinates.
if None, will use nilearn MNI-152 GM TPM thresholded at > 0.2
Output:
- pandas dataframe with p-values corresponding to relative and absolute distributions
within each cluster of the target volume.
- dict with {n_perm} pandas dataframes as resulting from the rel_abs_distributions method
"""
def thresh_img(img_stat, img_z):
# voxel threshold
img_vthresh = math_img(
f"stat * (np.abs(z) > {p_to_z(vox_thresh, tail='one')})",
stat=img_stat,
z=img_z)
# cluster threshold
img_labels, clust_labels, clust_sizes, clust_masses = get_cluster_stats(img_vthresh)
if thresh_type=="size":
clust_labels = clust_labels[clust_sizes > cluster_thresh]
elif thresh_type=="mass":
clust_labels = clust_labels[clust_masses > cluster_thresh]
img_cthresh = np.zeros(img_vthresh.shape)
for c in clust_labels:
img_cthresh[img_labels==c] = 1
# return new image
return new_img_like(img_vthresh, img_cthresh)
# run real ALE
lgr.info(f"Estimating 'true' results. Thresholding: voxel p < {vox_thresh}, cluster {thresh_type} > {cluster_thresh}.")
fit_ale = ALE()
if roi_vol is None:
ale_true = fit_ale.fit(ds)
# apply thresholds
roi_vol = thresh_img(img_stat=ale_true.get_map("stat"), img_z=ale_true.get_map("z"))
# get real distributions
distr_true = rel_abs_distributions(roi_vols=[roi_vol],
target_vol=load_img(target_vol),
target_labs=target_labs)
# number of experiments, foci, and subjects for null experiments
np.random.seed = seed
n_exp_true = len(ds.ids)
n_coord_true = [len(ds.coordinates.x[ds.coordinates.id==id]) for id in ds.ids]
n_sub_true = [n[0] for n in ds.get_metadata(field='sample_sizes')]
# Get coordinates for null experiments
# If no template given, fetch MNI152 GM mask and resample to target_vol space
# produces warning: "pixdim[0] (qfac) should be 1 or -1; setting qfac to 1",
# the result is okay, however.
if template is None:
template_1mm = nl.datasets.fetch_icbm152_brain_gm_mask(threshold=0.2)
template = nl.image.resample_to_img(template_1mm, target_vol,
interpolation='nearest')
# Get possible MNI coordinates for null studies = all voxels in template
dat = template.get_fdata() # get matrix
x, y, z = np.where(dat == 1.0) # get GM coordinates
affine = template.affine # get affine matrix for MNI transform
# get mni coordinates
mni_all = nl.image.coord_transform(x=x, y=y, z=z, affine=affine)
mni_all = np.array(mni_all).T
# Function to generate null datasets
def null_datasets(i):
# number of
n_coord_null = np.random.choice(n_coord_true, n_exp_true)
n_sub_null = np.random.choice(n_sub_true, n_exp_true)
# generate null experiments
exp_null_dict = {} # empty dict to store null study-wise results
for ii in range(n_exp_true):
study_id = f'null_{ii+1:03}' # study id
coords_list = mni_all[np.random.choice(mni_all.shape[0], n_coord_null[ii])] # pick random foci
coords = np.vstack(coords_list) # make coordinates array
# write into dict as required by NiMARE
exp_null_dict[study_id] = {
'contrasts': { '1': {
'coords': {
'space': 'MNI',
'x': list(coords[:, 0]),
'y': list(coords[:, 1]),
'z': list(coords[:, 2]),
},
'metadata': {
'sample_sizes': n_sub_null[ii]
}}}}
# convert dict to NiMARE dataset
ds_null = Dataset(exp_null_dict)
ds_null.coordinates['space'] = 'MNI' # avoids error when merging datasets
# return
return ds_null
# Generate null data
lgr.info(f"Generating null datasets (n = {n_perm}).")
ds_null = Parallel(n_jobs=n_proc)(delayed(null_datasets)(i) for i in tqdm(range(n_perm)))
ds_null = dict(zip(list(range(n_perm)), ds_null))
# Function to estimate ALE, calculate distributions and return result
def null_ale(ds_null):
# perform ALE
ale_null = fit_ale.fit(ds_null)
ale_null_thresh = thresh_img(img_stat=ale_null.get_map("stat"), img_z=ale_null.get_map("z"))
if (ale_null_thresh.get_fdata() > 0).any():
# get distributions
distr_null = rel_abs_distributions(roi_vols=[ale_null_thresh],
target_vol=load_img(target_vol))
else:
distr_null = None
# return
return distr_null
# Get distributions of null volumes across target volumes
lgr.info(f"Performing null meta-analyses and estimating distributions across target volume.")
distr_null = Parallel(n_jobs=n_proc)(delayed(null_ale)(ds_null[i]) for i in tqdm(range(n_perm)))
distr_null = dict(zip(list(range(n_perm)), distr_null))
# collect distribution data
lgr.info("Estimating p-values.")
p_rel, p_abs = dict(), dict()
for i_target, target in enumerate(distr_true.targetName):
data_null = np.zeros((n_perm,2))
for i in range(n_perm):
if distr_null[i] is not None:
data_null[i,:] = distr_null[i].loc[i_target, ["relDistr", "absDistr"]]
p_rel[target] = null_to_p(distr_true["relDistr"][i_target], data_null[:,0], tail="upper")
p_abs[target] = null_to_p(distr_true["absDistr"][i_target], data_null[:,1], tail="upper")
# return
return pd.DataFrame([p_rel, p_abs], index=["relDistr", "absDistr"]).T, distr_null