forked from vtraag/louvain-igraph
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Optimiser.py
424 lines (359 loc) · 16.4 KB
/
Optimiser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
from . import _c_louvain
from .VertexPartition import LinearResolutionParameterVertexPartition
from collections import namedtuple
from math import log, sqrt
import sys
# Check if working with Python 3
PY3 = (sys.version > '3')
class Optimiser(object):
""" Class for doing community detection using the Louvain algorithm.
The optimiser class provides a number of different methods for optimising a
given partition. The overall optimise procedure :func:`optimise_partition`
calls :func:`move_nodes` then aggregates the graph and repeats the same
procedure. For calculating the actual improvement of moving a node
(corresponding a subset of nodes in the aggregate graph), the code relies on
:func:`~VertexPartition.MutableVertexPartition.diff_move` which provides
different values for different methods (e.g. modularity or CPM). Finally, the
Optimiser class provides a routine to construct a :func:`resolution_profile`
on a resolution parameter.
"""
def __init__(self):
""" Create a new Optimiser object """
self._optimiser = _c_louvain._new_Optimiser()
#########################################################3
# consider_comms
@property
def consider_comms(self):
""" Determine how alternative communities are considered for moving
a node for *optimising* a partition.
Nodes will only move to alternative communities that improve the given
quality function.
Notes
-------
This attribute should be set to one of the following values
* :attr:`louvain.ALL_NEIGH_COMMS`
Consider all neighbouring communities for moving.
* :attr:`louvain.ALL_COMMS`
Consider all communities for moving. This is especially useful in the
case of negative links, in which case it may be better to move a node to
a non-neighbouring community.
* :attr:`louvain.RAND_NEIGH_COMM`
Consider a random neighbour community for moving. The probability to
choose a community is proportional to the number of neighbours a node has
in that community.
* :attr:`louvain.RAND_COMM`
Consider a random community for moving. The probability to choose a
community is proportional to the number of nodes in that community.
"""
return _c_louvain._Optimiser_get_consider_comms(self._optimiser)
@consider_comms.setter
def consider_comms(self, value):
_c_louvain._Optimiser_set_consider_comms(self._optimiser, value)
##########################################################
# consider_empty_community
@property
def consider_empty_community(self):
""" boolean: if ``True`` consider also moving nodes to an empty community
(default). """
return _c_louvain._Optimiser_get_consider_empty_community(self._optimiser)
@consider_empty_community.setter
def consider_empty_community(self, value):
_c_louvain._Optimiser_set_consider_empty_community(self._optimiser, value)
##########################################################
# Set rng seed
def set_rng_seed(self, value):
""" Set the random seed for the random number generator.
Parameters
----------
value
The integer seed used in the random number generator
"""
_c_louvain._Optimiser_set_rng_seed(self._optimiser, value)
def optimise_partition(self, partition):
""" Optimise the given partition.
Parameters
----------
partition
The :class:`~VertexPartition.MutableVertexPartition` to optimise.
Returns
-------
float
Improvement in quality function.
Examples
--------
>>> G = ig.Graph.Famous('Zachary')
>>> optimiser = louvain.Optimiser()
>>> partition = louvain.ModularityVertexPartition(G)
>>> diff = optimiser.optimise_partition(partition)
"""
# Perhaps we
diff = _c_louvain._Optimiser_optimise_partition(self._optimiser, partition._partition)
partition._update_internal_membership()
return diff
def optimise_partition_multiplex(self, partitions, layer_weights=None):
""" Optimise the given partitions simultaneously.
Parameters
----------
partitions
List of :class:`~VertexPartition.MutableVertexPartition` layers to optimise.
layer_weights
List of weights of layers.
Returns
-------
float
Improvement in quality of combined partitions, see `Notes <#notes-multiplex>`_.
.. _notes-multiplex:
Notes
-----
This method assumes that the partitions are defined for graphs with the
same vertices. The connections between the vertices may be different, but
the vertices themselves should be identical. In other words, all vertices
should have identical indices in all graphs (i.e. node `i` is assumed to be
the same node in all graphs). The quality of the overall partition is
simply the sum of the individual qualities for the various partitions,
weighted by the layer_weight. If we denote by :math:`Q_k` the quality of
layer :math:`k` and the weight by :math:`\\lambda_k`, the overall quality
is then
.. math:: Q = \sum_k \\lambda_k Q_k.
This is particularly useful for graphs containing negative links. When
separating the graph in two graphs, the one containing only the positive
links, and the other only the negative link, by supplying a negative weight
to the latter layer, we try to find relatively many positive links within a
community and relatively many negative links between communities. Note that
in this case it may be better to assign a node to a community to which it
is not connected so that :attr:`consider_comms` may be better set to
:attr:`louvain.ALL_COMMS`.
Besides multiplex graphs where each node is assumed to have a single
community, it is also useful in the case of for example multiple time
slices, or in situations where nodes can have different communities in
different slices. The package includes some special helper functions for
using :func:`optimise_partition_multiplex` in such cases, where there is a
conversion required from (time) slices to layers suitable for use in this
function.
See Also
--------
:func:`slices_to_layers`
:func:`time_slices_to_layers`
:func:`find_partition_multiplex`
:func:`find_partition_temporal`
Examples
--------
>>> G_pos = ig.Graph.SBM(100, pref_matrix=[[0.5, 0.1], [0.1, 0.5]], block_sizes=[50, 50])
>>> G_neg = ig.Graph.SBM(100, pref_matrix=[[0.1, 0.5], [0.5, 0.1]], block_sizes=[50, 50])
>>> optimiser = louvain.Optimiser()
>>> partition_pos = louvain.ModularityVertexPartition(G_pos)
>>> partition_neg = louvain.ModularityVertexPartition(G_neg)
>>> diff = optimiser.optimise_partition_multiplex(
... partitions=[partition_pos, partition_neg],
... layer_weights=[1,-1])
"""
if not layer_weights:
layer_weights = [1]*len(partitions)
diff = _c_louvain._Optimiser_optimise_partition_multiplex(
self._optimiser,
[partition._partition for partition in partitions],
layer_weights)
for partition in partitions:
partition._update_internal_membership()
return diff
def move_nodes(self, partition, consider_comms=None):
""" Move nodes to alternative communities for *optimising* the partition.
Parameters
----------
partition
The partition for which to move nodes.
consider_comms
If ``None`` uses :attr:`consider_comms`, but can be set to
something else.
Returns
-------
float
Improvement in quality function.
Notes
-----
When moving nodes, the function loops over nodes and considers moving the
node to an alternative community. Which community depends on
``consider_comms``. The function terminates when no more nodes can be moved
to an alternative community.
Examples
--------
>>> G = ig.Graph.Famous('Zachary')
>>> optimiser = louvain.Optimiser()
>>> partition = louvain.ModularityVertexPartition(G)
>>> diff = optimiser.move_nodes(partition)
"""
if (consider_comms is None):
consider_comms = self.consider_comms
diff = _c_louvain._Optimiser_move_nodes(self._optimiser, partition._partition, consider_comms)
partition._update_internal_membership()
return diff
def resolution_profile(self,
graph,
partition_type,
resolution_range,
weights=None,
bisect_func=lambda p: p.bisect_value(),
min_diff_bisect_value=1,
min_diff_resolution=1e-3,
linear_bisection=False,
number_iterations=1,
**kwargs
):
""" Use bisectioning on the resolution parameter in order to construct a
resolution profile.
Parameters
----------
graph
The graph for which to construct a resolution profile.
partition_type
The type of :class:`~VertexPartition.MutableVertexPartition` used
to find a partition (must support resolution parameters obviously).
resolution_range
The range of resolution values that we would like to scan.
weights
If provided, indicates the edge attribute to use as a weight.
Returns
-------
list of :class:`~VertexPartition.MutableVertexPartition`
A list of partitions for different resolutions.
Other Parameters
----------------
bisect_func
The function used for bisectioning. For the methods currently
implemented, this should usually not be altered.
min_diff_bisect_value
The difference in the value returned by the bisect_func below which the
bisectioning stops (i.e. by default, a difference of a single edge does
not trigger further bisectioning).
min_diff_resolution
The difference in resolution below which the bisectioning stops. For
positive differences, the logarithmic difference is used by default, i.e.
``diff = log(res_1) - log(res_2) = log(res_1/res_2)``, for which ``diff >
min_diff_resolution`` to continue bisectioning. Set the linear_bisection
to true in order to use only linear bisectioning (in the case of negative
resolution parameters for example, which can happen with negative
weights).
linear_bisection
Whether the bisectioning will be done on a linear or on a logarithmic
basis (if possible).
number_iterations
Indicates the number of iterations of the algorithm to run. If negative
(or zero) the algorithm is run until a stable iteration.
Examples
--------
>>> G = ig.Graph.Famous('Zachary')
>>> optimiser = louvain.Optimiser()
>>> profile = optimiser.resolution_profile(G, louvain.CPMVertexPartition,
... resolution_range=(0,1))
"""
# Helper function for cleaning values to be a stepwise function
def clean_stepwise(bisect_values):
# Check best partition for each resolution parameter
for res, bisect in bisect_values.iteritems():
best_bisect = bisect
best_quality = bisect.partition.quality(res)
for res2, bisect2 in bisect_values.iteritems():
if bisect2.partition.quality(res) > best_quality:
best_bisect = bisect2
best_quality = bisect2.partition.quality(res)
if best_bisect != bisect:
bisect_values[res] = best_bisect
# We only need to keep the changes in the bisection values
bisect_list = sorted([(res, part.bisect_value) for res, part in
bisect_values.iteritems()], key=lambda x: x[0])
for (res1, v1), (res2, v2) \
in zip(bisect_list,
bisect_list[1:]):
# If two consecutive bisection values are the same, remove the second
# resolution parameter
if v1 == v2:
del bisect_values[res2]
for res, bisect in bisect_values.iteritems():
bisect.partition.resolution_parameter = res
# We assume here that the bisection values are
# monotonically decreasing with increasing resolution
# parameter values.
def ensure_monotonicity(bisect_values, new_res):
# First check if this partition improves on any other partition
for res, bisect_part in bisect_values.iteritems():
if bisect_values[new_res].partition.quality(res) > bisect_part.partition.quality(res):
bisect_values[res] = bisect_values[new_res]
# Then check what is best partition for the new_res
current_quality = bisect_values[new_res].partition.quality(new_res)
best_res = new_res
for res, bisect_part in bisect_values.iteritems():
if bisect_part.partition.quality(new_res) > current_quality:
best_res = new_res
bisect_values[new_res] = bisect_values[best_res]
def find_partition(self, graph, partition_type, weights=None, **kwargs):
partition = partition_type(graph,
weights=weights,
**kwargs)
n_itr = 0
while self.optimise_partition(partition) > 0 and \
(n_itr < number_iterations or number_iterations <= 0):
n_itr += 1
return partition
assert issubclass(partition_type, LinearResolutionParameterVertexPartition), "Bisectioning only works on partitions with a linear resolution parameter."
# Start actual bisectioning
bisect_values = {}
stack_res_range = []
# Push first range onto the stack
stack_res_range.append(resolution_range)
# Make sure the bisection values are calculated
# The namedtuple we will use in the bisection function
BisectPartition = namedtuple('BisectPartition',
['partition', 'bisect_value'])
partition = find_partition(self, graph=graph, partition_type=partition_type,
weights=weights,resolution_parameter=resolution_range[0],
**kwargs)
bisect_values[resolution_range[0]] = BisectPartition(partition=partition,
bisect_value=bisect_func(partition))
partition = find_partition(self, graph=graph, partition_type=partition_type,
weights=weights, resolution_parameter=resolution_range[1], **kwargs)
bisect_values[resolution_range[1]] = BisectPartition(partition=partition,
bisect_value=bisect_func(partition))
# While stack of ranges not yet empty
while stack_res_range:
# Get the current range from the stack
current_range = stack_res_range.pop()
# Get the difference in bisection values
diff_bisect_value = abs(bisect_values[current_range[0]].bisect_value -
bisect_values[current_range[1]].bisect_value)
# Get the difference in resolution parameter (in log space if 0 is not in
# the interval (assuming only non-negative resolution parameters).
if current_range[0] > 0 and current_range[1] > 0 and not linear_bisection:
diff_resolution = log(current_range[1]/current_range[0])
else:
diff_resolution = abs(current_range[1] - current_range[0])
# Check if we still want to scan a smaller interval
# If we would like to bisect this interval
if diff_bisect_value > min_diff_bisect_value and \
diff_resolution > min_diff_resolution:
# Determine new resolution value
if current_range[0] > 0 and current_range[1] > 0 and not linear_bisection:
new_res = sqrt(current_range[1]*current_range[0])
else:
new_res = sum(current_range)/2.0
# Bisect left (push on stack)
stack_res_range.append((current_range[0], new_res))
# Bisect right (push on stack)
stack_res_range.append((new_res, current_range[1]))
# If we haven't scanned this resolution value yet,
# do so now
if not bisect_values.has_key(new_res):
partition = find_partition(self, graph, partition_type=partition_type,
weights=weights, resolution_parameter=new_res, **kwargs)
bisect_values[new_res] = BisectPartition(partition=partition,
bisect_value=bisect_func(partition))
# Because of stochastic differences in different runs, the monotonicity
# of the bisection values might be violated, so check for any
# inconsistencies
ensure_monotonicity(bisect_values, new_res)
# Ensure we only keep those resolution values for which
# the bisection values actually changed, instead of all of them
clean_stepwise(bisect_values)
# Use an ordered dict so that when iterating over it, the results appear in
# increasing order based on the resolution value.
return sorted((bisect.partition for res, bisect in
bisect_values.iteritems()), key=lambda x: x.resolution_parameter)