|
| 1 | + |
| 2 | +import networkx as nx |
| 3 | + |
| 4 | + |
| 5 | +if nx.__version__ >= '2': |
| 6 | + from_pandas_edgelist = nx.from_pandas_edgelist |
| 7 | + set_node_attributes = nx.set_node_attributes |
| 8 | + |
| 9 | +else: # pragma: no cover |
| 10 | + from networkx.convert import _prep_create_using |
| 11 | + |
| 12 | + # this code is slightly modified from the source code for NetworkX version 2.0 |
| 13 | + |
| 14 | + def from_pandas_edgelist(df, source='source', target='target', edge_attr=None, |
| 15 | + create_using=None): |
| 16 | + """Return a graph from Pandas DataFrame containing an edge list. |
| 17 | +
|
| 18 | + The Pandas DataFrame should contain at least two columns of node names and |
| 19 | + zero or more columns of node attributes. Each row will be processed as one |
| 20 | + edge instance. |
| 21 | +
|
| 22 | + Note: This function iterates over DataFrame.values, which is not |
| 23 | + guaranteed to retain the data type across columns in the row. This is only |
| 24 | + a problem if your row is entirely numeric and a mix of ints and floats. In |
| 25 | + that case, all values will be returned as floats. See the |
| 26 | + DataFrame.iterrows documentation for an example. |
| 27 | +
|
| 28 | + Parameters |
| 29 | + ---------- |
| 30 | + df : Pandas DataFrame |
| 31 | + An edge list representation of a graph |
| 32 | +
|
| 33 | + source : str or int |
| 34 | + A valid column name (string or iteger) for the source nodes (for the |
| 35 | + directed case). |
| 36 | +
|
| 37 | + target : str or int |
| 38 | + A valid column name (string or iteger) for the target nodes (for the |
| 39 | + directed case). |
| 40 | +
|
| 41 | + edge_attr : str or int, iterable, True |
| 42 | + A valid column name (str or integer) or list of column names that will |
| 43 | + be used to retrieve items from the row and add them to the graph as edge |
| 44 | + attributes. If `True`, all of the remaining columns will be added. |
| 45 | +
|
| 46 | + create_using : NetworkX graph |
| 47 | + Use specified graph for result. The default is Graph() |
| 48 | +
|
| 49 | + See Also |
| 50 | + -------- |
| 51 | + to_pandas_edgelist |
| 52 | +
|
| 53 | + Examples |
| 54 | + -------- |
| 55 | + Simple integer weights on edges: |
| 56 | +
|
| 57 | + >>> import pandas as pd |
| 58 | + >>> import numpy as np |
| 59 | + >>> r = np.random.RandomState(seed=5) |
| 60 | + >>> ints = r.random_integers(1, 10, size=(3,2)) |
| 61 | + >>> a = ['A', 'B', 'C'] |
| 62 | + >>> b = ['D', 'A', 'E'] |
| 63 | + >>> df = pd.DataFrame(ints, columns=['weight', 'cost']) |
| 64 | + >>> df[0] = a |
| 65 | + >>> df['b'] = b |
| 66 | + >>> df |
| 67 | + weight cost 0 b |
| 68 | + 0 4 7 A D |
| 69 | + 1 7 1 B A |
| 70 | + 2 10 9 C E |
| 71 | + >>> G = nx.from_pandas_edgelist(df, 0, 'b', ['weight', 'cost']) |
| 72 | + >>> G['E']['C']['weight'] |
| 73 | + 10 |
| 74 | + >>> G['E']['C']['cost'] |
| 75 | + 9 |
| 76 | + >>> edges = pd.DataFrame({'source': [0, 1, 2], |
| 77 | + ... 'target': [2, 2, 3], |
| 78 | + ... 'weight': [3, 4, 5], |
| 79 | + ... 'color': ['red', 'blue', 'blue']}) |
| 80 | + >>> G = nx.from_pandas_edgelist(edges, edge_attr=True) |
| 81 | + >>> G[0][2]['color'] |
| 82 | + 'red' |
| 83 | +
|
| 84 | + """ |
| 85 | + |
| 86 | + g = _prep_create_using(create_using) |
| 87 | + |
| 88 | + # Index of source and target |
| 89 | + src_i = df.columns.get_loc(source) |
| 90 | + tar_i = df.columns.get_loc(target) |
| 91 | + if edge_attr: |
| 92 | + # If all additional columns requested, build up a list of tuples |
| 93 | + # [(name, index),...] |
| 94 | + if edge_attr is True: |
| 95 | + # Create a list of all columns indices, ignore nodes |
| 96 | + edge_i = [] |
| 97 | + for i, col in enumerate(df.columns): |
| 98 | + if col is not source and col is not target: |
| 99 | + edge_i.append((col, i)) |
| 100 | + # If a list or tuple of name is requested |
| 101 | + elif isinstance(edge_attr, (list, tuple)): |
| 102 | + edge_i = [(i, df.columns.get_loc(i)) for i in edge_attr] |
| 103 | + # If a string or int is passed |
| 104 | + else: |
| 105 | + edge_i = [(edge_attr, df.columns.get_loc(edge_attr)), ] |
| 106 | + |
| 107 | + # Iteration on values returns the rows as Numpy arrays |
| 108 | + for row in df.values: |
| 109 | + s, t = row[src_i], row[tar_i] |
| 110 | + if g.is_multigraph(): |
| 111 | + g.add_edge(s, t) |
| 112 | + # default keys just count, so max is most recent |
| 113 | + key = max(g[s][t]) |
| 114 | + g[s][t][key].update((i, row[j]) for i, j in edge_i) |
| 115 | + else: |
| 116 | + g.add_edge(s, t) |
| 117 | + g[s][t].update((i, row[j]) for i, j in edge_i) |
| 118 | + |
| 119 | + # If no column names are given, then just return the edges. |
| 120 | + else: |
| 121 | + for row in df.values: |
| 122 | + g.add_edge(row[src_i], row[tar_i]) |
| 123 | + |
| 124 | + return g |
| 125 | + |
| 126 | + def set_node_attributes(G, values, name=None): |
| 127 | + """Sets node attributes from a given value or dictionary of values. |
| 128 | +
|
| 129 | + # AMO: modified to use the patched G.node[] accessor rather than G.nodes[] |
| 130 | + # which works only if version >= 2 |
| 131 | +
|
| 132 | + Parameters |
| 133 | + ---------- |
| 134 | + G : NetworkX Graph |
| 135 | +
|
| 136 | + values : scalar value, dict-like |
| 137 | + What the node attribute should be set to. If `values` is |
| 138 | + not a dictionary, then it is treated as a single attribute value |
| 139 | + that is then applied to every node in `G`. This means that if |
| 140 | + you provide a mutable object, like a list, updates to that object |
| 141 | + will be reflected in the node attribute for each edge. The attribute |
| 142 | + name will be `name`. |
| 143 | +
|
| 144 | + If `values` is a dict or a dict of dict, the corresponding node's |
| 145 | + attributes will be updated to `values`. |
| 146 | +
|
| 147 | + name : string (optional, default=None) |
| 148 | + Name of the node attribute to set if values is a scalar. |
| 149 | +
|
| 150 | + Examples |
| 151 | + -------- |
| 152 | + After computing some property of the nodes of a graph, you may want |
| 153 | + to assign a node attribute to store the value of that property for |
| 154 | + each node:: |
| 155 | +
|
| 156 | + >>> G = nx.path_graph(3) |
| 157 | + >>> bb = nx.betweenness_centrality(G) |
| 158 | + >>> isinstance(bb, dict) |
| 159 | + True |
| 160 | + >>> nx.set_node_attributes(G, bb, 'betweenness') |
| 161 | + >>> G.nodes[1]['betweenness'] |
| 162 | + 1.0 |
| 163 | +
|
| 164 | + If you provide a list as the second argument, updates to the list |
| 165 | + will be reflected in the node attribute for each node:: |
| 166 | +
|
| 167 | + >>> G = nx.path_graph(3) |
| 168 | + >>> labels = [] |
| 169 | + >>> nx.set_node_attributes(G, labels, 'labels') |
| 170 | + >>> labels.append('foo') |
| 171 | + >>> G.nodes[0]['labels'] |
| 172 | + ['foo'] |
| 173 | + >>> G.nodes[1]['labels'] |
| 174 | + ['foo'] |
| 175 | + >>> G.nodes[2]['labels'] |
| 176 | + ['foo'] |
| 177 | +
|
| 178 | + If you provide a dictionary of dictionaries as the second argument, |
| 179 | + the entire dictionary will be used to update node attributes:: |
| 180 | +
|
| 181 | + >>> G = nx.path_graph(3) |
| 182 | + >>> attrs = {0: {'attr1': 20, 'attr2': 'nothing'}, 1: {'attr2': 3}} |
| 183 | + >>> nx.set_node_attributes(G, attrs) |
| 184 | + >>> G.nodes[0]['attr1'] |
| 185 | + 20 |
| 186 | + >>> G.nodes[0]['attr2'] |
| 187 | + 'nothing' |
| 188 | + >>> G.nodes[1]['attr2'] |
| 189 | + 3 |
| 190 | + >>> G.nodes[2] |
| 191 | + {} |
| 192 | +
|
| 193 | + """ |
| 194 | + # Set node attributes based on type of `values` |
| 195 | + if name is not None: # `values` must not be a dict of dict |
| 196 | + try: # `values` is a dict |
| 197 | + for n, v in values.items(): |
| 198 | + try: |
| 199 | + G.node[n][name] = values[n] |
| 200 | + except KeyError: |
| 201 | + pass |
| 202 | + except AttributeError: # `values` is a constant |
| 203 | + for n in G: |
| 204 | + G.node[n][name] = values |
| 205 | + else: # `values` must be dict of dict |
| 206 | + for n, d in values.items(): |
| 207 | + try: |
| 208 | + G.node[n].update(d) |
| 209 | + except KeyError: |
| 210 | + pass |
0 commit comments