forked from vtraag/louvain-igraph
-
Notifications
You must be signed in to change notification settings - Fork 0
/
functions.py
481 lines (390 loc) · 17.9 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
import sys
import igraph as _ig
from . import _c_louvain
from ._c_louvain import ALL_COMMS
from ._c_louvain import ALL_NEIGH_COMMS
from ._c_louvain import RAND_COMM
from ._c_louvain import RAND_NEIGH_COMM
from collections import Counter
# Check if working with Python 3
PY3 = (sys.version > '3')
def _get_py_capsule(graph):
if PY3:
return graph.__graph_as_capsule()
else:
return graph.__graph_as_cobject()
from .VertexPartition import *
from .Optimiser import *
def find_partition(graph, partition_type, initial_membership=None, weights=None, seed=None, **kwargs):
""" Detect communities using the default settings.
This function detects communities given the specified method in the
``partition_type``. This should be type derived from
:class:`VertexPartition.MutableVertexPartition`, e.g.
:class:`ModularityVertexPartition` or :class:`CPMVertexPartition`. Optionally
an initial membership and edge weights can be provided. Remaining
``**kwargs`` are passed on to the constructor of the ``partition_type``,
including for example a ``resolution_parameter``.
Parameters
----------
graph : :class:`ig.Graph`
The graph for which to detect communities.
partition_type : type of :class:`
The type of partition to use for optimisation.
initial_membership : list of int
Initial membership for the partition. If :obj:`None` then defaults to a
singleton partition.
weights : list of double, or edge attribute
Weights of edges. Can be either an iterable or an edge attribute.
seed : int
Seed for the random number generator. By default uses a random seed
if nothing is specified.
**kwargs
Remaining keyword arguments, passed on to constructor of
``partition_type``.
Returns
-------
partition
The optimised partition.
See Also
--------
:func:`Optimiser.optimise_partition`
Examples
--------
>>> G = ig.Graph.Famous('Zachary')
>>> partition = louvain.find_partition(G, louvain.ModularityVertexPartition)
"""
if not weights is None:
kwargs['weights'] = weights
partition = partition_type(graph,
initial_membership=initial_membership,
**kwargs)
optimiser = Optimiser()
if (not seed is None):
optimiser.set_rng_seed(seed)
optimiser.optimise_partition(partition)
return partition
def find_partition_multiplex(graphs, partition_type, seed=None, **kwargs):
""" Detect communities for multiplex graphs.
Each graph should be defined on the same set of vertices, only the edges may
differ for different graphs. See
:func:`Optimiser.optimise_partition_multiplex` for a more detailed
explanation.
Parameters
----------
graphs : list of :class:`ig.Graph`
List of :class:`louvain.VertexPartition` layers to optimise.
partition_type : type of :class:`MutableVertexPartition`
The type of partition to use for optimisation (identical for all graphs).
seed : int
Seed for the random number generator. By default uses a random seed
if nothing is specified.
**kwargs
Remaining keyword arguments, passed on to constructor of ``partition_type``.
Returns
-------
list of int
membership of nodes.
float
Improvement in quality of combined partitions, see
:func:`Optimiser.optimise_partition_multiplex`.
Notes
-----
We don't return a partition in this case because a partition is always
defined on a single graph. We therefore simply return the membership (which
is the same for all layers).
See Also
--------
:func:`Optimiser.optimise_partition_multiplex`
:func:`slices_to_layers`
Examples
--------
>>> n = 100
>>> G_1 = ig.Graph.Lattice([n], 1)
>>> G_2 = ig.Graph.Lattice([n], 1)
>>> membership, improvement = louvain.find_partition_multiplex([G_1, G_2],
... louvain.ModularityVertexPartition)
"""
n_layers = len(graphs)
partitions = []
layer_weights = [1]*n_layers
for graph in graphs:
partitions.append(partition_type(graph, **kwargs))
optimiser = Optimiser()
if (not seed is None):
optimiser.set_rng_seed(seed)
improvement = optimiser.optimise_partition_multiplex(partitions, layer_weights)
return partitions[0].membership, improvement
def find_partition_temporal(graphs, partition_type,
interslice_weight=1,
slice_attr='slice', vertex_id_attr='id',
edge_type_attr='type', weight_attr='weight',
seed=None,
**kwargs):
""" Detect communities for temporal graphs.
Each graph is considered to represent a time slice and does not necessarily
need to be defined on the same set of vertices. Nodes in two consecutive
slices are identified on the basis of the ``vertex_id_attr``, i.e. if two
nodes in two consecutive slices have an identical value of the
``vertex_id_attr`` they are coupled. The ``vertex_id_attr`` should hence be
unique in each slice. The nodes are then coupled with a weight of
``interslice_weight`` which is set in the edge attribute ``weight_attr``. No
weight is set if the ``interslice_weight`` is None (i.e. corresponding in
practice with a weight of 1). See :func:`time_slices_to_layers` for
a more detailed explanation.
Parameters
----------
graphs : list of :class:`ig.Graph`
List of :class:`louvain.VertexPartition` layers to optimise.
partition_type : type of :class:`VertexPartition.MutableVertexPartition`
The type of partition to use for optimisation (identical for all graphs).
interslice_weight : float
The weight of the coupling between two consecutive time slices.
slice_attr : string
The vertex attribute to use for indicating the slice of a node.
vertex_id_attr : string
The vertex to use to identify nodes.
edge_type_attr : string
The edge attribute to use for indicating the type of link (`interslice` or
`intraslice`).
weight_attr : string
The edge attribute used to indicate the weight.
seed : int
Seed for the random number generator. By default uses a random seed
if nothing is specified.
**kwargs
Remaining keyword arguments, passed on to constructor of
``partition_type``.
Returns
-------
list of membership
list containing for each slice the membership vector.
float
Improvement in quality of combined partitions, see
:func:`Optimiser.optimise_partition_multiplex`.
See Also
--------
:func:`time_slices_to_layers`
:func:`slices_to_layers`
Examples
--------
>>> n = 100
>>> G_1 = ig.Graph.Lattice([n], 1)
>>> G_1.vs['id'] = range(n)
>>> G_2 = ig.Graph.Lattice([n], 1)
>>> G_2.vs['id'] = range(n)
>>> membership, improvement = louvain.find_partition_temporal([G_1, G_2],
... louvain.ModularityVertexPartition,
... interslice_weight=1)
"""
# Create layers
G_layers, G_interslice, G = time_slices_to_layers(graphs,
interslice_weight,
slice_attr=slice_attr,
vertex_id_attr=vertex_id_attr,
edge_type_attr=edge_type_attr,
weight_attr=weight_attr)
# Optimise partitions
arg_dict = {}
if 'node_sizes' in partition_type.__init__.__code__.co_varnames:
arg_dict['node_sizes'] = 'node_size'
if 'weights' in partition_type.__init__.__code__.co_varnames:
arg_dict['weights'] = 'weight'
arg_dict.update(kwargs)
partitions = []
for H in G_layers:
arg_dict['graph'] = H
partitions.append(partition_type(**arg_dict))
# We can always take the same interslice partition, as this should have no
# cost in the optimisation.
partition_interslice = CPMVertexPartition(G_interslice, resolution_parameter=0,
node_sizes='node_size', weights=weight_attr)
optimiser = Optimiser()
if (not seed is None):
optimiser.set_rng_seed(seed)
improvement = optimiser.optimise_partition_multiplex(partitions + [partition_interslice])
# Transform results back into original form.
membership = {(v[slice_attr], v[vertex_id_attr]): m for v, m in zip(G.vs, partitions[0].membership)}
membership_time_slices = []
for slice_idx, H in enumerate(graphs):
membership_slice = [membership[(slice_idx, v[vertex_id_attr])] for v in H.vs]
membership_time_slices.append(list(membership_slice))
return membership_time_slices, improvement
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# These are helper functions to create a proper
# disjoint union in python. The igraph implementation
# currently does not keep the attributes when creating
# a disjoint_union.
def get_attrs_or_nones(seq, attr_name):
try:
return seq[attr_name]
except KeyError:
return [None] * len(seq)
def disjoint_union_attrs(graphs):
G = _ig.Graph.disjoint_union(graphs[0], graphs[1:])
vertex_attributes = set(sum([H.vertex_attributes() for H in graphs], []))
edge_attributes = set(sum([H.edge_attributes() for H in graphs], []))
for attr in vertex_attributes:
attr_value = sum([get_attrs_or_nones(H.vs, attr) for H in graphs], [])
G.vs[attr] = attr_value
for attr in edge_attributes:
attr_value = sum([get_attrs_or_nones(H.es, attr) for H in graphs], [])
G.es[attr] = attr_value
return G
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Conversion to layer graphs
def time_slices_to_layers(graphs,
interslice_weight=1,
slice_attr='slice',
vertex_id_attr='id',
edge_type_attr='type',
weight_attr='weight'):
""" Convert time slices to layer graphs.
Each graph is considered to represent a time slice. This function simply
connects all the consecutive slices (i.e. the slice graph) with an
``interslice_weight``. The further conversion is then delegated to
:func:`slices_to_layers`, which also provides further details.
See Also
--------
:func:`find_partition_temporal`
:func:`slices_to_layers`
"""
G_slices = _ig.Graph.Tree(len(graphs), 1, mode=_ig.TREE_UNDIRECTED)
G_slices.es[weight_attr] = interslice_weight
G_slices.vs[slice_attr] = graphs
return slices_to_layers(G_slices,
slice_attr,
vertex_id_attr,
edge_type_attr,
weight_attr)
def slices_to_layers(G_coupling,
slice_attr='slice',
vertex_id_attr='id',
edge_type_attr='type',
weight_attr='weight'):
""" Convert a coupling graph of slices to layers of graphs.
This function converts a graph of slices to layers so that they can be used
with this package. This function assumes that the slices are represented by
nodes in ``G_coupling``, and stored in the attribute ``slice_attr``. In other
words, ``G_coupling.vs[slice_attr]`` should contain :class:`ig.Graph` s . The
slices will be converted to layers, and nodes in different slices will be
coupled if the two slices are connected in ``G_coupling``. Nodes in two
connected slices are identified on the basis of the ``vertex_id_attr``, i.e.
if two nodes in two connected slices have an identical value of the
``vertex_id_attr`` they will be coupled. The ``vertex_id_attr`` should hence
be unique in each slice. The weight of the coupling is determined by the
weight of this link in ``G_coupling``, as determined by the ``weight_attr``.
Parameters
----------
G_coupling : :class:`ig.Graph`
The graph connecting the different slices.
slice_attr : string
The vertex attribute which contains the slices.
vertex_id_attr : string
The vertex attribute which is used to identify whether two nodes in two
slices represent the same node, and hence, should be coupled.
edge_type_attr : string
The edge attribute to use for indicating the type of link (``interslice``
or ``intraslice``).
weight_attr : string
The edge attribute used to indicate the (coupling) weight.
Returns
-------
G_layers : list of :class:`ig.Graph`
A list of slices converted to layers.
G_interslice : :class:`ig.Graph`
The interslice coupling layer.
G : :class:`ig.Graph`
The complete graph containing all layers and interslice couplings.
Notes
-----
The distinction between slices and layers is not easy to grasp. Slices in
this context refer to graphs that somehow represents different aspects of a
network. The simplest example is probably slices that represents time: there
are different snapshots network across time, and each snapshot is considered
a slice. Some nodes may drop out of the network over time, while others enter
the network. Edges may change over time, or the weight of the links may
change over time. This is just the simplest example of a slice, and there may
be different, more complex possibilities. Below an example with three time
slices:
.. image:: figures/slices.png
Now in order to optimise partitions across these different slices, we
represent them slightly differently, namely as layers. The idea of layers is
that all graphs always are defined on the same set of nodes, and that only
the links differ for different layers. We thus create new nodes as
combinations of original nodes and slices. For example, if node 1 existed in
both slice 1 and in slice 2, we will thus create two nodes to build the
layers: a node 1-1 and a node 1-2. Additionally, if the slices are connected
in the slice graph, the two nodes would also be connected, so there would be
a linke between node 1-1 and 1-2. Different slices will then correspond to
different layers: each layer only contains the link for that particular
slice. In addition, for methods such as :class:`CPMVertexPartition`,
so-called ``node_sizes`` are required, and for them to properly function,
they should be set to 0 (which is handled appropriately in this function, and
stored in the vertex attribute ``node_size``). We thus obtain equally many
layers as we have slices, and we need one more layer for representing the
interslice couplings. For the example provided above, we thus obtain the
following:
.. image:: figures/layers_separate.png
The idea of doing community detection with slices is further detailed in [1].
References
----------
.. [1] Mucha, P. J., Richardson, T., Macon, K., Porter, M. A., & Onnela,
J.-P. (2010). Community structure in time-dependent, multiscale, and
multiplex networks. Science, 328(5980), 876-8.
`10.1126/science.1184819 <http://doi.org/10.1126/science.1184819>`_
See Also
--------
:func:`find_partition_temporal`
:func:`time_slices_to_layers`
"""
if not slice_attr in G_coupling.vertex_attributes():
raise ValueError("Could not find the vertex attribute {0} in the coupling graph.".format(slice_attr))
if not weight_attr in G_coupling.edge_attributes():
raise ValueError("Could not find the edge attribute {0} in the coupling graph.".format(weight_attr))
# Create disjoint union of the time graphs
for v_slice in G_coupling.vs:
H = v_slice[slice_attr]
H.vs[slice_attr] = v_slice.index
if not vertex_id_attr in H.vertex_attributes():
raise ValueError("Could not find the vertex attribute {0} to identify nodes in different slices.".format(vertex_id_attr ))
if not weight_attr in H.edge_attributes():
H.es[weight_attr] = 1
G = disjoint_union_attrs(G_coupling.vs[slice_attr])
G.es[edge_type_attr] = 'intraslice'
for v_slice in G_coupling.vs:
for u_slice in v_slice.neighbors(mode=_ig.OUT):
if v_slice.index < u_slice.index or G_coupling.is_directed():
nodes_v = G.vs.select(lambda v: v[slice_attr]==v_slice.index)[vertex_id_attr]
if len(set(nodes_v)) != len(nodes_v):
err = '\n'.join(
['\t{0} {1} times'.format(item, count) for item, count in Counter(nodes_v).items() if count > 1]
)
raise ValueError('No unique IDs for slice {0}, require unique IDs:\n{1}'.format(v_slice.index, err))
nodes_u = G.vs.select(lambda v: v[slice_attr]==u_slice.index)[vertex_id_attr]
if len(set(nodes_u)) != len(nodes_u):
err = '\n'.join(
['\t{0} {1} times'.format(item, count) for item, count in Counter(nodes_u).items() if count > 1]
)
raise ValueError('No unique IDs for slice {0}, require unique IDs:\n{1}'.format(u_slice.index, err))
common_nodes = set(nodes_v).intersection(set(nodes_u))
nodes_v = sorted([v for v in G.vs if v[slice_attr] == v_slice.index and v[vertex_id_attr] in common_nodes], key=lambda v: v[vertex_id_attr])
nodes_u = sorted([v for v in G.vs if v[slice_attr] == u_slice.index and v[vertex_id_attr] in common_nodes], key=lambda v: v[vertex_id_attr])
edges = zip(nodes_v, nodes_u)
e_start = G.ecount()
G.add_edges(edges)
e_end = G.ecount()
e_idx = range(e_start, e_end)
interslice_weight = G_coupling.es[G_coupling.get_eid(v_slice, u_slice)][weight_attr]
if not interslice_weight is None:
G.es[e_idx][weight_attr] = interslice_weight
G.es[e_idx][edge_type_attr] = 'interslice'
# Convert aggregate graph to individual layers for each time slice.
G_layers = [None]*G_coupling.vcount()
for v_slice in G_coupling.vs:
H = G.subgraph_edges(G.es.select(_within=[v.index for v in G.vs if v[slice_attr] == v_slice.index]), delete_vertices=False)
H.vs['node_size'] = [1 if v[slice_attr] == v_slice.index else 0 for v in H.vs]
G_layers[v_slice.index] = H
# Create one graph for the interslice links.
G_interslice = G.subgraph_edges(G.es.select(type_eq='interslice'), delete_vertices=False)
G_interslice.vs['node_size'] = 0
return G_layers, G_interslice, G