forked from keon/algorithms
-
Notifications
You must be signed in to change notification settings - Fork 2
/
set_covering.py
115 lines (93 loc) · 3.6 KB
/
set_covering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from itertools import chain, combinations
"""
Universe *U* of n elements
Collection of subsets of U:
S = S1,S2...,Sm
Where every substet Si has an associated cost.
Find a minimum cost subcollection of S that covers all elements of U
Example:
U = {1,2,3,4,5}
S = {S1,S2,S3}
S1 = {4,1,3}, Cost(S1) = 5
S2 = {2,5}, Cost(S2) = 10
S3 = {1,4,3,2}, Cost(S3) = 3
Output:
Set cover = {S2, S3}
Min Cost = 13
"""
def powerset(iterable):
"""Calculate the powerset of any iterable.
For a range of integers up to the length of the given list,
make all possible combinations and chain them together as one object.
From https://docs.python.org/3/library/itertools.html#itertools-recipes
"""
"list(powerset([1,2,3])) --> [(), (1,), (2,), (3,), (1,2), (1,3), (2,3), (1,2,3)]"
s = list(iterable)
return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))
def optimal_set_cover(universe, subsets, costs):
""" Optimal algorithm - DONT USE ON BIG INPUTS - O(2^n) complexity!
Finds the minimum cost subcollection os S that covers all elements of U
Args:
universe (list): Universe of elements
subsets (dict): Subsets of U {S1:elements,S2:elements}
costs (dict): Costs of each subset in S - {S1:cost, S2:cost...}
"""
pset = powerset(subsets.keys())
best_set = None
best_cost = float("inf")
for subset in pset:
covered = set()
cost = 0
for s in subset:
covered.update(subsets[s])
cost += costs[s]
if len(covered) == len(universe) and cost < best_cost:
best_set = subset
best_cost = cost
return best_set
def greedy_set_cover(universe, subsets, costs):
"""Approximate greedy algorithm for set-covering. Can be used on large
inputs - though not an optimal solution.
Args:
universe (list): Universe of elements
subsets (dict): Subsets of U {S1:elements,S2:elements}
costs (dict): Costs of each subset in S - {S1:cost, S2:cost...}
"""
elements = set(e for s in subsets.keys() for e in subsets[s])
# elements don't cover universe -> invalid input for set cover
if elements != universe:
return None
# track elements of universe covered
covered = set()
cover_sets = []
while covered != universe:
min_cost_elem_ratio = float("inf")
min_set = None
# find set with minimum cost:elements_added ratio
for s, elements in subsets.items():
new_elements = len(elements - covered)
# set may have same elements as already covered -> new_elements = 0
# check to avoid division by 0 error
if new_elements != 0:
cost_elem_ratio = costs[s] / new_elements
if cost_elem_ratio < min_cost_elem_ratio:
min_cost_elem_ratio = cost_elem_ratio
min_set = s
cover_sets.append(min_set)
# union
covered |= subsets[min_set]
return cover_sets
if __name__ == '__main__':
universe = {1, 2, 3, 4, 5}
subsets = {'S1': {4, 1, 3}, 'S2': {2, 5}, 'S3': {1, 4, 3, 2}}
costs = {'S1': 5, 'S2': 10, 'S3': 3}
optimal_cover = optimal_set_cover(universe, subsets, costs)
optimal_cost = sum(costs[s] for s in optimal_cover)
greedy_cover = greedy_set_cover(universe, subsets, costs)
greedy_cost = sum(costs[s] for s in greedy_cover)
print('Optimal Set Cover:')
print(optimal_cover)
print('Cost = %s' % optimal_cost)
print('Greedy Set Cover:')
print(greedy_cover)
print('Cost = %s' % greedy_cost)