-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnetgen.py
223 lines (188 loc) · 6.37 KB
/
netgen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
# -*- coding: utf-8 -*-
"""
This module contains functions for thresholding matrices
and outputting links/networks.
"""
import numpy as np
import igraph
import dataio
def get_graph_from_bare_data(corr_mat_fname, blacklist_fname, density,
include_mst=False, weighted=False):
"""
Extracts a graph from raw data.
Parameters
----------
corr_mat_fname : str
path to the file containing the correlation matrix.
blacklist_fname : str
path to the bool blacklist
density : float
the network density to use
include_mst : bool
whether to include the maximum spanning tree
weighted : bool
whether to consider the network as weighted
Returns
-------
net : igraph.Graph
the network
"""
corr_mat = dataio.load_adj_matrix_from_mat(corr_mat_fname)
ok_nodes = dataio.get_ok_nodes(blacklist_fname)
net = make_net_from_unfiltered_data(
corr_mat,
ok_nodes,
density,
include_mst=include_mst,
weighted=weighted)
return net
def _get_filtered_triu_adj_mat_copy(matrix, ok_nodes):
"""
Takes only the nodes listed in ok_nodes into account.
Parameters
----------
matrix : np.array
2D matrix with bad nodes
ok_nodes : numpy bool array
Returns
-------
m : np.array
a copy of the matrix where the bad nodes have been removed
"""
m = matrix.copy()
m = m[ok_nodes, :]
m = m[:, ok_nodes]
return np.triu(m, 1)
def make_net_from_unfiltered_data(corr_mat, ok_nodes, density, include_mst=False,
weighted=False):
"""
Constructs a net from unfiltered data.
Parameters
----------
corr_mat : np.array
2D numpy array with bad nodes.
ok_nodes : np.array
the bool blacklist (whitelist)
density : float
the network density to use
include_mst : bool
whether to include the maximum spanning tree
weighted : bool
whether to consider the network as weighted
Returns
-------
net : igraph.Graph
"""
assert 0 <= density <= 1
edgelist = sort_links_by_weight(corr_mat, ok_nodes, include_mst)
nNodes = sum(ok_nodes)
nLinksMax = (nNodes * (nNodes - 1)) / 2
nLinks = int(nLinksMax * density)
edgelist = edgelist[:nLinks]
return make_net(edgelist, nNodes, weighted)
def get_treshold_value(corr_mat, ok_nodes, density, include_mst=False):
"""
Constructs a net from unfiltered data.
Parameters
----------
corr_mat : np.array
2D numpy array with bad nodes.
ok_nodes : np.array
the bool blacklist (whitelist)
density : float
the network density to use
include_mst : bool
whether to include the maximum spanning tree
Returns
-------
threshold: float
the weight corresponding to the last considered link
(i.e. no threshold)
"""
assert 0 <= density <= 1
edgelist = sort_links_by_weight(corr_mat, ok_nodes, include_mst)
n_nodes = sum(ok_nodes)
n_links_max = (n_nodes * (n_nodes - 1)) / 2
n_links = int(n_links_max * density)
return edgelist[n_links]['weight']
def make_net(edgelist, nNodes, weighted):
'''
Create the network given the edgelist and number of nodes
Parameters
----------
weighted : (boolean)
Whether weights are to be considered or not
'''
graph = igraph.Graph(nNodes)
graph.add_edges(zip(edgelist['node1'], edgelist['node2']))
if weighted is True:
# graph.es['weight'] = 1
graph.es['weight'] = edgelist['weight']
# for n1, n2, w in edgelist:
# graph[n1, n2] = w
return graph
def make_full_weighted_net_from_weight_mat(matrix, ok_nodes, return_weights=False):
"""
Takes in an adjacency/correlation matrix, and constructs an undirected
weighted network
"""
nNodes = np.sum(ok_nodes)
graph = igraph.Graph(nNodes)
triu_indices = np.triu_indices_from(matrix, 1)
edgelist = np.array(triu_indices).T
graph.add_edges(edgelist)
weights = matrix[triu_indices]
graph.es["weight"] = weights
if return_weights:
return graph, weights
return graph
def sort_links_by_weight(corr_mat, ok_nodes, include_mst):
"""
Sort the links by their link-weight
Parameters
----------
corr_mat : np.array
2D numpy array with bad nodes.
ok_nodes : np.array
the bool blacklist (whitelist)
include_mst : Bool
If true add the maximum spanning tree to the begining of sorted list
Returns
-------
edgelist : numpy structrued array (node1, node2, weight)
array([(0, 1, 1.0), (0, 3, 0.5), (2, 3, 0.5), (0, 4, 0.7), (1, 4, 0.4)],
dtype=[('node1', '<i4'), ('node2', '<i4'), ('weight', '<f8')])
"""
up_diag_matrix = _get_filtered_triu_adj_mat_copy(corr_mat, ok_nodes)
n = len(up_diag_matrix)
minVal = np.min(up_diag_matrix)
minValMinusOne = np.min(up_diag_matrix) - 1
# So that possible overflows don't go unnoticed
assert minValMinusOne < minVal
initEdges = np.array(np.triu_indices_from(up_diag_matrix, 1)).T
weights = up_diag_matrix[np.triu_indices_from(up_diag_matrix, 1)]
nLinksMax = (n * (n - 1)) / 2
nLinksMST = 0
edgelist = np.zeros(
nLinksMax, dtype=[('node1', 'i4'), ('node2', 'i4'), ('weight', 'f8')])
# Get the maximum spanning tree (Multyplying the weights by -1 does the
# trick)
if include_mst:
g = igraph.Graph(n, list(initEdges), directed=False)
mst = g.spanning_tree(-1 * weights, return_tree=False)
for i, ei in enumerate(mst):
edge = g.es[ei]
edgelist[i] = edge.source, edge.target, weights[ei]
# Take these links away from the orig. mat
up_diag_matrix[edge.source, edge.target] = minValMinusOne
nLinksMST = len(mst)
# How many links we still need to take after (possible) MST:
nLinksYetToTake = np.max([nLinksMax - nLinksMST, 0]) # mst already there
# Get the next largest indices
up_diag_matrix[np.tril_indices_from(up_diag_matrix, 0)] = minValMinusOne
mflat = up_diag_matrix.flatten()
flatindices = mflat.argsort()[::-1][:nLinksYetToTake]
edgelist[nLinksMST:]['node1'], edgelist[nLinksMST:][
'node2'] = np.unravel_index(flatindices, (n, n))
edgelist[nLinksMST:]['weight'] = mflat[flatindices]
return edgelist